
    Lg&                         S SK r S SKJr  S SKJrJrJrJr  S SKJr	  S SK
Jr  S SKJr   " S S5      rS r1 S	kr  SS
 jr  SS jrSS jrg)    N)_pandas_api)CodecTableconcat_tablesschema)_feather)FeatherErrorc                   <    \ rS rSrSrS	S jrS
S jrS rSS jrSr	g)FeatherDataset   z
Encapsulates details of reading a list of Feather files.

Parameters
----------
path_or_paths : List[str]
    A list of file names
validate_schema : bool, default True
    Check that individual file schemas are all the same / compatible
c                     Xl         X l        g N)pathsvalidate_schema)selfpath_or_pathsr   s      /lib/python3.12/site-packages/pyarrow/feather.py__init__FeatherDataset.__init__)   s    "
.    Nc                 D   [        U R                  S   US9nU/U l        UR                  U l        U R                  SS  HI  n[        X1S9nU R                  (       a  U R                  X45        U R                  R                  U5        MK     [        U R                  5      $ )z
Read multiple feather files as a single pyarrow.Table

Parameters
----------
columns : List[str]
    Names of columns to read from the file

Returns
-------
pyarrow.Table
    Content of the file as a table (of columns)
r   columns   N)
read_tabler   _tablesr   r   validate_schemasappendr   )r   r   _filpathtables        r   r   FeatherDataset.read_table-   s     $**Q-9vkkJJqrNDt5E##%%d2LL&	 #
 T\\**r   c                     U R                   R                  UR                   5      (       d/  [        SR                  XR                   UR                   5      5      eg )Nz-Schema in {!s} was different. 
{!s}

vs

{!s})r   equals
ValueErrorformat)r   piecer!   s      r   r   FeatherDataset.validate_schemasF   sI    {{!!%,,// 2$fUKK%*\\34 4 0r   c                 :    U R                  US9R                  US9$ )aA  
Read multiple Parquet files as a single pandas DataFrame

Parameters
----------
columns : List[str]
    Names of columns to read from the file
use_threads : bool, default True
    Use multiple threads when converting to pandas

Returns
-------
pandas.DataFrame
    Content of the file as a pandas DataFrame (of columns)
r   )use_threadsr   	to_pandas)r   r   r*   s      r   read_pandasFeatherDataset.read_pandasM   s*      w/99# : % 	%r   )r   r   r   r   )Tr   )NT)
__name__
__module____qualname____firstlineno____doc__r   r   r   r-   __static_attributes__ r   r   r   r      s    	/+24%r   r   c                 $   UR                   S:X  a  g UR                  [        R                  " 5       [        R                  " 5       4;   a  [        SR                  U 5      5      e[        SR                  U [        UR                  5      5      5      e)Nr   zqColumn '{}' exceeds 2GB maximum capacity of a Feather binary column. This restriction may be lifted in the futurezkColumn '{}' of type {} was chunked on conversion to Arrow and cannot be currently written to Feather format)
num_chunkstypeextbinarystringr%   r&   str)namecols     r   check_chunked_overflowr?   a   ss    
~~
xxCJJL#**,// 006t> 	>
  **0&s388}*EG 	Gr   >   lz4zstduncompressedc           	         [         R                  (       aN  [         R                  (       a9  [        U [         R                  R
                  5      (       a  U R                  5       n [         R                  " U 5      (       ao  US:X  a  SnOUS:X  a  SnO[        S5      e[        R                  " XS9nUS:X  a7  [        UR                  R                  5       H  u  pXx   n
[        X5        M     OU nUS:X  a]  [        UR                   5      [        [#        UR                   5      5      :  a  [        S5      eUb  [        S5      eUb  [        S	5      eOMUc  [$        R&                  " S
5      (       a  SnO,Ub)  U[(        ;  a  [        SR+                  U[(        5      5      e [,        R.                  " XqUUXES9  g! [0         aI    [        U[2        5      (       a2   [4        R6                  " U5        e ! [4        R8                   a     e f = fe f = f)aF  
Write a pandas.DataFrame to Feather format.

Parameters
----------
df : pandas.DataFrame or pyarrow.Table
    Data to write out as Feather format.
dest : str
    Local destination path.
compression : string, default None
    Can be one of {"zstd", "lz4", "uncompressed"}. The default of None uses
    LZ4 for V2 files if it is available, otherwise uncompressed.
compression_level : int, default None
    Use a compression level particular to the chosen compressor. If None
    use the default compression level
chunksize : int, default None
    For V2 files, the internal maximum size of Arrow RecordBatch chunks
    when writing the Arrow IPC file format. None means use the default,
    which is currently 64K
version : int, default 2
    Feather file version. Version 2 is the current. Version 1 is the more
    limited legacy format
r   F   Nz%Version value should either be 1 or 2)preserve_indexz'cannot serialize duplicate column namesz2Feather V1 files do not support compression optionz0Feather V1 files do not support chunksize option	lz4_framer@   z1compression="{}" not supported, must be one of {})compressioncompression_level	chunksizeversion)r   have_pandas
has_sparse
isinstancepdSparseDataFrameto_denseis_data_framer%   r   from_pandas	enumerater   namesr?   lencolumn_namessetr   is_available_FEATHER_SUPPORTED_CODECSr&   r   write_feather	Exceptionr<   osremoveerror)dfdestrG   rH   rI   rJ   rE   r!   ir=   r>   s              r   rZ   rZ   s   s   2 ""2{~~==>>B  $$ a<"N\!NDEE!!"Da<$U\\%7%78h&t1 9 !|u!!"SU-?-?)@%AAFGG" & ' '   & ' ' ! 5#5#5k#B#BK%!:: ))/0I*KL L
u1B)2	E  dC  		$ 	 88 s0   !F9 9HG0/H0HHHHc                 <    [        XUUS9R                  " SSU0UD6$ )aN  
Read a pandas.DataFrame from Feather format. To read as pyarrow.Table use
feather.read_table.

Parameters
----------
source : str file path, or file-like object
    You can use MemoryMappedFile as source, for explicitly use memory map.
columns : sequence, optional
    Only read a specific set of columns. If not provided, all columns are
    read.
use_threads : bool, default True
    Whether to parallelize reading using multiple threads. If false the
    restriction is used in the conversion to Pandas as well as in the
    reading from Feather format.
memory_map : boolean, default False
    Use memory mapping when opening file on disk, when source is a str.
**kwargs
    Additional keyword arguments passed on to `pyarrow.Table.to_pandas`.

Returns
-------
df : pandas.DataFrame
    The contents of the Feather file as a pandas.DataFrame
)r   
memory_mapr*   r*   r5   r+   )sourcer   r*   rc   kwargss        r   read_featherrf      s<    6 J!!*+ N7BNFLN Or   c                 4   [         R                  " XUS9nUc  UR                  5       $ U Vs/ s H  n[        U5      PM     nn[	        [        S U5      5      (       a  UR                  U5      nOb[	        [        S U5      5      (       a  UR                  U5      nO5U Vs/ s H  oR                  PM     n	n[        SR                  X5      5      eUR                  S:  a  U$ [        [        U5      5      U:X  a  U$ UR                  U5      $ s  snf s  snf )aO  
Read a pyarrow.Table from Feather format

Parameters
----------
source : str file path, or file-like object
    You can use MemoryMappedFile as source, for explicitly use memory map.
columns : sequence, optional
    Only read a specific set of columns. If not provided, all columns are
    read.
memory_map : boolean, default False
    Use memory mapping when opening file on disk, when source is a str
use_threads : bool, default True
    Whether to parallelize reading using multiple threads.

Returns
-------
table : pyarrow.Table
    The contents of the Feather file as a pyarrow.Table
)use_memory_mapr*   c                     U [         :H  $ r   )intts    r   <lambda>read_table.<locals>.<lambda>  s    cr   c                     U [         :H  $ r   )r<   rk   s    r   rm   rn     s    18r   z<Columns must be indices or names. Got columns {} of types {}   )r   FeatherReaderreadr8   allmapread_indices
read_namesr/   	TypeErrorr&   rJ   sortedrW   select)
rd   r   rc   r*   readercolumncolumn_typesr!   rl   column_type_namess
             r   r   r      s   * ##{DF {{}/67wVDLwL7
3!<011##G,	S#\2	3	3!!'*1=>AZZ> 5;= 	=
 ~~	G		( ||G$$' 8 ?s   D"D)NNNrD   )NTF)NFT)r\   pyarrow.pandas_compatr   pyarrow.libr   r   r   r   libr9   pyarrowr   pyarrow._featherr	   r   r?   rY   rZ   rf   r   r5   r   r   <module>r      s`   & 
 -0 0   )A% A%HG <  AE*+Qh 48!O@.%r   