
    +gd                         d dl Z d dlmZmZmZ d dlmZ ddlm	Z	m
Z
mZmZ ddlmZ ddlmZ ddlmZ ddlmZ dd	lmZmZ d
dlmZ  G d de          Z G d d          ZdS )    N)BinaryIOOptionalUnion   )DatasetFeatures
NamedSplitconfig)query_table)_PACKAGED_DATASETS_MODULES)Parquet)logging)NestedDataStructureLikePathLike   )AbstractDatasetReaderc                        e Zd Z	 	 	 	 	 	 ddee         dee         dee         dede	de	d	ee
         f fd
Zd Z xZS )ParquetDatasetReaderNFpath_or_pathssplitfeatures	cache_dirkeep_in_memory	streamingnum_procc           
           t                      j        |f||||||d| t          |t                    r|n| j        |i}t
          d         d         }	t          d||||	d|| _        d S )N)r   r   r   r   r   r   parquetr   )r   
data_filesr   hash )super__init__
isinstancedictr   r   r   builder)selfr   r   r   r   r   r   r   kwargsr   	__class__s             3lib/python3.11/site-packages/datasets/io/parquet.pyr"   zParquetDatasetReader.__init__   s     			
)		
 		
 		
 		
 		
 *4M4)H)Hitz[hNi))4Q7 
$	
 

 
 
    c                     | j         r!| j                            | j                  }nSd }d }d }d }| j                            ||||| j                   | j                            | j        || j                  }|S )N)r   )download_configdownload_modeverification_mode	base_pathr   )r   r.   	in_memory)r   r%   as_streaming_datasetr   download_and_preparer   
as_datasetr   )r&   datasetr,   r-   r.   r/   s         r)   readzParquetDatasetReader.read/   s    > 	l77dj7IIGG #O M $IL-- /+"3# .    l--j4EQUQd .  G r*   )NNNFFN)__name__
__module____qualname__r   r   r   r	   r   strboolintr"   r5   __classcell__)r(   s   @r)   r   r      s         '+'+$"&
 
.x8
 
#
 8$	

 
 
 
 3-
 
 
 
 
 
>      r*   r   c                   b    e Zd Z	 d
dedeeef         dee         fdZ	defdZ
dededefd	ZdS )ParquetDatasetWriterNr4   path_or_buf
batch_sizec                 >    || _         || _        || _        || _        d S N)r4   r?   r@   parquet_writer_kwargs)r&   r4   r?   r@   rC   s        r)   r"   zParquetDatasetWriter.__init__I   s(     &$%:"""r*   returnc                 L   | j         r| j         nt          j        }t          | j        t
          t          t          j        f          rCt          | j        d          5 } | j
        d||d| j        }d d d            n# 1 swxY w Y   n | j
        d| j        |d| j        }|S )Nzwb+)file_objr@   r    )r@   r
   DEFAULT_MAX_BATCH_SIZEr#   r?   r9   bytesosr   open_writerC   )r&   r@   bufferwrittens       r)   writezParquetDatasetWriter.writeU   s    (,ZT__V=Z
d&eR[(ABB 	rd&.. l&%$+kv*kkPTPjkkl l l l l l l l l l l l l l l "dkq4+;
qqVZVpqqGs   A>>BBrF   c           	         d}|                     dd          }| j        j        j        }t	          j        |fd|i|}t          j        t          dt          | j                  |          dt          j
                     d          D ]f}t          | j        j        t          |||z             | j        j        | j        j        nd          }	|                    |	           ||	j        z  }g|                                 |S )	zWrites the pyarrow table as Parquet to a binary file handle.

        Caller is responsible for opening and closing the handle.
        r   r?   Nschemabaz"Creating parquet from Arrow format)unitdisabledesc)tablekeyindices)popr4   r   arrow_schemapqParquetWriterr   tqdmrangelenis_progress_bar_enabledr   _dataslice_indiceswrite_tablenbytesclose)
r&   rF   r@   rC   rM   _rP   writeroffsetbatchs
             r)   rK   zParquetDatasetWriter._write_   s   
 !%%mT::&3!(SS6S=RSSl!S&&
3379995	
 
 
 	$ 	$F  l(&&:"566151F1R--X\  E
 u%%%u|#GGr*   rB   )r6   r7   r8   r   r   r   r   r   r;   r"   rN   rK   r    r*   r)   r>   r>   H   s        
 %)	
; 
;
; 8X-.
; SM	
; 
; 
; 
;s    x S VY      r*   r>   )rI   typingr   r   r   pyarrow.parquetr   rZ    r   r   r	   r
   
formattingr   packaged_modulesr    packaged_modules.parquet.parquetr   utilsr   utils.typingr   r   abcr   r   r>   r    r*   r)   <module>rs      s2   				 , , , , , , , , , ,       4 4 4 4 4 4 4 4 4 4 4 4 $ $ $ $ $ $ 9 9 9 9 9 9 6 6 6 6 6 6       < < < < < < < < & & & & & &6 6 6 6 60 6 6 6r0 0 0 0 0 0 0 0 0 0r*   