
    +gd                        d dl Z d dlZd dlmZ d dlmZmZmZ ddlm	Z	m
Z
mZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZ ddlm Z  ddl!m"Z" ddl#m$Z$  e e%          Z&erdd	l'm(Z( dd
eee)e*f                  fdZ+ddZ,dS )    N)wraps)TYPE_CHECKINGOptionalUnion   )	xbasenamexdirname	xet_parsexexistsxgetsizexglob
xgzip_openxisdirxisfilexjoinxlistdirxnumpy_loadxopenxpandas_read_csvxpandas_read_excelxPathxrelpathxsio_loadmatxsplit	xsplitextxwalkxxml_dom_minidom_parse)
get_logger)patch_submodule)get_importsDatasetBuilderuse_auth_tokenc                 z   t          j        |           }t          |d          r	|j        rdS fd}t	          |d |t
                                                               t	          |d |t                                                               t	          |d |t                                                               t	          |d |t                                                               t	          |dt                                                     t	          |d	t                                                     t	          |d
t                                                     t	          |dt                                                     t	          |dt                                                     t	          |dt                                                     t	          |d |t                                                                t	          |d |t"                                                               t	          |d |t$                                                               t	          |d |t&                                                               t	          |dt(                                                     t	          |d |t*                                                               t	          |d |t,                                                               t	          |d |t.                    dg                                           t	          |d |t0                    dg                                           t	          |d |t2                    dg                                           t	          |d |t4                                                               t	          |d |t6                                                               d|_        dS )a  Extend the module to support streaming.

    We patch some functions in the module to use `fsspec` to support data streaming:
    - We use `fsspec.open` to open and read remote files. We patch the module function:
      - `open`
    - We use the "::" hop separator to join paths and navigate remote compressed/archive files. We patch the module
      functions:
      - `os.path.join`
      - `pathlib.Path.joinpath` and `pathlib.Path.__truediv__` (called when using the "/" operator)

    The patched functions are replaced with custom functions defined to work with the
    :class:`~download.streaming_download_manager.StreamingDownloadManager`.

    Args:
        module_path: Path to the module to be extended.
        use_auth_token (``str`` or :obj:`bool`, optional): Optional string or boolean to use as Bearer token for remote files on the Datasets Hub.
            If True, or not specified, will get token from `"~/.huggingface"`.
    _patched_for_streamingNc                 N     t                      fd            }d|_        |S )Nc                       | di|S )Nr#    )argskwargsfunctionr#   s     2lib/python3.11/site-packages/datasets/streaming.pywrapperz?extend_module_for_streaming.<locals>.wrap_auth.<locals>.wrapperE   s    8TK.KFKKK    	wrap_auth)r   _decorator_name_)r+   r-   r#   s   ` r,   r/   z.extend_module_for_streaming.<locals>.wrap_authD   sG    	x	L 	L 	L 	L 	L 
	L $/ r.   openz
os.listdirzos.walkz	glob.globzos.path.joinzos.path.dirnamezos.path.basenamezos.path.relpathzos.path.splitzos.path.splitextzos.path.existszos.path.isdirzos.path.isfilezos.path.getsizezpathlib.Pathz	gzip.openz
numpy.loadzpandas.read_csv__version__)attrszpandas.read_excelzscipy.io.loadmatzxml.etree.ElementTree.parsezxml.dom.minidom.parseT)	importlibimport_modulehasattrr%   r   r   startr   r   r   r   r	   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r
   r   )module_pathr#   moduler/   s    `  r,   extend_module_for_streamingr:   *   s   ( $[11F v/00 V5R      FFIIe$4$455;;===FL))H*=*=>>DDFFFFIyy'7'788>>@@@FK5)9)9::@@BBBFNE2288:::F-x88>>@@@F.	::@@BBBF-x88>>@@@FOV44::<<<F.	::@@BBBF,ii.@.@AAGGIIIFOYYv->->??EEGGGF,ii.@.@AAGGIIIF-yy/B/BCCIIKKKFNE2288:::FK:)>)>??EEGGGFL))K*@*@AAGGIIIF-yy9I/J/JS`RabbbhhjjjF/;M1N1NWdVefffllnnnF.		,0G0GP]___eegggF999Y;O;OPPVVXXXF3YY?U5V5VWW]]___$(F!!!r.   builderr"   c                    t          | j        | j                   | j                            d          st	          t          j        | j                            D ]e}|d         dk    rW|d         }d                    | j        	                    d          dd         |gz             }t          || j                   fdd	l
m fd
t          |           j        dd         D             }|D ]}t          || j                   dS )zExtend the dataset builder module and the modules imported by it to support streaming.

    Args:
        builder (:class:`DatasetBuilder`): Dataset builder instance.
    )r#   z	datasets.r   internalr   .Nr!   c                 ^    g | ])}t          |          r|j        j        k    "|j        *S r(   )
issubclass
__module__).0clsr"   s     r,   
<listcomp>z8extend_dataset_builder_for_streaming.<locals>.<listcomp>~   sI       c>** 03~AZ/Z/Z 	/Z/Z/Zr.   )r:   rB   r#   
startswithr    inspectgetfile	__class__joinsplitr;   r"   type__mro__)r;   importsinternal_import_nameinternal_module_nameparent_builder_modulesr9   r"   s         @r,   $extend_dataset_builder_for_streamingrR   i   sL      27CYZZZZ((55 i"7?73D#E#EFF 	i 	iGqzZ'''.qz$'*xx0B0H0H0M0Mcrc0RVjUk0k'l'l$+,@QXQghhhh
 ('''''   ==(,  
 ) S S#F7;QRRRRRS Sr.   )N)r;   r"   )-r4   rG   	functoolsr   typingr   r   r   #download.streaming_download_managerr   r	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   utils.loggingr   utils.patchingr   utils.py_utilsr    __name__loggerr;   r"   strboolr:   rR   r(   r.   r,   <module>r]      s              1 1 1 1 1 1 1 1 1 1                                               0 & % % % % % + + + + + + ' ' ' ' ' ' 
H		  (''''''<) <)XeCQUIFV=W <) <) <) <)~S S S S S Sr.   