
    +gd,[                     L   d Z ddlZddlZddlZddlZddlZddlmZ ddlmZm	Z	m
Z
mZ ddlmZmZ ddlmZ ddlmZmZ e G d	 d
                      Ze G d d                      Z G d dej                  Z G d de          Z G d de          ZeZ G d de          Z G d de          Z G d de          Z G d de          Z G d d          Z  ej!        ddd g          Z" G d! d"          Z# G d# d$e$          Z%e G d% d&                      Z&dS )'zSplits related API.    N)	dataclass)DictListOptionalUnion   )FileInstructionsmake_file_instructions)	_split_re)NonMutableDictasdictc                       e Zd ZU dZeed<   dZeed<   dZeed<   dZ	e
ee                  ed<    ej        ddd	i
          Ze
e         ed<   ed             ZdS )	SplitInfo namer   	num_bytesnum_examplesNshard_lengths$include_in_asdict_even_if_is_defaultT)defaultmetadatadataset_namec                 d    t          | j        | gt          | j                            }|j        S )/Returns the list of dict(filename, take, skip).r   split_infosinstruction)r
   r   strr   file_instructions)selfinstructionss     /lib/python3.11/site-packages/datasets/splits.pyr   zSplitInfo.file_instructions/   s9     ."DI
 
 

 --    )__name__
__module____qualname__r   r   __annotations__r   intr   r   r   r   dataclassesfieldr   propertyr    r#   r"   r   r       s         D#NNNIsL#)-M8DI&--- #4+"3 FM# # #L(3-    . . X. . .r#   r   c                   J    e Zd ZU dZeed<   ed             Zed             ZdS )SubSplitInfozWrapper around a sub split info.
    This class expose info on the subsplit:
    ```
    ds, info = datasets.load_dataset(..., split='train[75%:]', with_info=True)
    info.splits['train[75%:]'].num_examples
    ```
    r!   c                     | j         j        S )z.Returns the number of example in the subsplit.)r!   r   r    s    r"   r   zSubSplitInfo.num_examplesG   s      --r#   c                     | j         j        S )r   )r!   r   r0   s    r"   r   zSubSplitInfo.file_instructionsL   s      22r#   N)	r$   r%   r&   __doc__r	   r'   r+   r   r   r,   r#   r"   r.   r.   ;   sb           #""". . X. 3 3 X3 3 3r#   r.   c                   L    e Zd ZdZej        d             Zd Zd Zd Z	ddZ
dS )		SplitBasea  Abstract base class for Split compositionality.

    See the
    [guide on splits](../loading#slice-splits)
    for more information.

    There are three parts to the composition:
        1) The splits are composed (defined, merged, split,...) together before
             calling the `.as_dataset()` function. This is done with the `__add__`,
             `__getitem__`, which return a tree of `SplitBase` (whose leaf
             are the `NamedSplit` objects)

        ```
        split = datasets.Split.TRAIN + datasets.Split.TEST.subsplit(datasets.percent[:50])
        ```

        2) The `SplitBase` is forwarded to the `.as_dataset()` function
             to be resolved into actual read instruction. This is done by the
             `.get_read_instruction()` method which takes the real dataset splits
             (name, number of shards,...) and parse the tree to return a
             `SplitReadInstruction()` object

        ```
        read_instruction = split.get_read_instruction(self.info.splits)
        ```

        3) The `SplitReadInstruction` is then used in the `tf.data.Dataset` pipeline
             to define which files to read and how to skip examples within file.

    c                      t          d          )zParse the descriptor tree and compile all read instructions together.

        Args:
            split_dict: `dict`, The `dict[split_name, SplitInfo]` of the dataset

        Returns:
            split_read_instruction: `SplitReadInstruction`
        zAbstract method)NotImplementedErrorr    
split_dicts     r"   get_read_instructionzSplitBase.get_read_instructiont   s     ""3444r#   c                 \    t          |t          t          f          rdS t          d          )*Equality: datasets.Split.TRAIN == 'train'.Fz6Equality is not implemented between merged/sub splits.)
isinstance
NamedSplitr   r6   r    others     r"   __eq__zSplitBase.__eq__   s-    ej#.// 	5!"Z[[[r#   c                 .    |                      |           S )z+InEquality: datasets.Split.TRAIN != 'test'.)r@   r>   s     r"   __ne__zSplitBase.__ne__   s    ;;u%%%%r#   c                 "    t          | |          S )z4Merging: datasets.Split.TRAIN + datasets.Split.TEST.)_SplitMergedr>   s     r"   __add__zSplitBase.__add__   s    D%(((r#   Nc                    
 t          d ||||fD                       dk    rt          d          t          |t                    r|}n/t          |t                    r|}nt          |t
                    r|}|s|s|st          d| d          d }|rd|cxk     rdk    sn t          d	|           d|z  

fd
t          |          D             }t	          |d         j        d          |d<    ||           t           fd|D                       S |rt           |          S |rt          |          fd|D             }d}d}g }|D ],}	||	z  }|
                    t	          ||                     |}-t	          |d         j        d          |d<    ||           t           fd|D                       S t          d          )a6  Divides this split into subsplits.

        There are 3 ways to define subsplits, which correspond to the 3
        arguments `k` (get `k` even subsplits), `percent` (get a slice of the
        dataset with `datasets.percent`), and `weighted` (get subsplits with proportions
        specified by `weighted`).

        Example::

        ```
        # 50% train, 50% test
        train, test = split.subsplit(k=2)
        # 50% train, 25% test, 25% validation
        train, test, validation = split.subsplit(weighted=[2, 1, 1])
        # Extract last 20%
        subsplit = split.subsplit(datasets.percent[-20:])
        ```

        Warning: k and weighted will be converted into percent which mean that
        values below the percent will be rounded up or down. The final split may be
        bigger to deal with remainders. For instance:

        ```
        train, test, valid = split.subsplit(k=3)  # 33%, 33%, 34%
        s1, s2, s3, s4 = split.subsplit(weighted=[2, 2, 1, 1])  # 33%, 33%, 16%, 18%
        ```

        Args:
            arg: If no kwargs are given, `arg` will be interpreted as one of
                `k`, `percent`, or `weighted` depending on the type.
                For example:
                ```
                split.subsplit(10)  # Equivalent to split.subsplit(k=10)
                split.subsplit(datasets.percent[:-20])  # percent=datasets.percent[:-20]
                split.subsplit([1, 1, 2])  # weighted=[1, 1, 2]
                ```
            k: `int` If set, subdivide the split into `k` equal parts.
            percent: `datasets.percent slice`, return a single subsplit corresponding to
                a slice of the original split. For example:
                `split.subsplit(datasets.percent[-20:])  # Last 20% of the dataset`.
            weighted: `list[int]`, return a list of subsplits whose proportions match
                the normalized sum of the list. For example:
                `split.subsplit(weighted=[1, 1, 2])  # 25%, 25%, 50%`.

        Returns:
            A subsplit or list of subsplits extracted from this split object.
        c              3   4   K   | ]}t          |          V  d S N)bool).0xs     r"   	<genexpr>z%SplitBase.subsplit.<locals>.<genexpr>   s(      <<1tAww<<<<<<r#   r   z,Only one argument of subsplit should be set.zInvalid split argument zg. Only list, slice and int supported. One of k, weighted or percent should be set to a non empty value.c                 z    t          d | D             g           t          t          d                    k    sJ d S )Nc              3   h   K   | ]-}t          t          |                    d                      V  .dS )d   N)listrangeindicesrJ   ss     r"   rL   zESplitBase.subsplit.<locals>.assert_slices_coverage.<locals>.<genexpr>   s9      EEUAIIcNN344EEEEEEr#   rO   )sumrP   rQ   )slicess    r"   assert_slices_coveragez2SplitBase.subsplit.<locals>.assert_slices_coverage   sD    EEfEEErJJdSXY\S]S]N^N^^^^^^^r#   r   rO   z,Subsplit k should be between 0 and 100, got c                 B    g | ]}t          |z  |d z   z            S )r   )slice)rJ   ishifts     r"   
<listcomp>z&SplitBase.subsplit.<locals>.<listcomp>   s/    JJJAeAIA77JJJr#   c              3   8   K   | ]}t          |          V  d S rH   	_SubSplitrJ   rT   r    s     r"   rL   z%SplitBase.subsplit.<locals>.<genexpr>   -      <<4++<<<<<<r#   c                      g | ]
}d |z  z  S )rO   r,   )rJ   rK   totals     r"   r\   z&SplitBase.subsplit.<locals>.<listcomp>   s"    ;;;Qa5(;;;r#   c              3   8   K   | ]}t          |          V  d S rH   r_   ra   s     r"   rL   z%SplitBase.subsplit.<locals>.<genexpr>   rb   r#   zCould not determine the split)rU   
ValueErrorr<   r(   rY   rP   rQ   starttupler`   append)r    argkpercentweightedrW   rV   rg   stopvr[   rd   s   `         @@r"   subsplitzSplitBase.subsplit   s}   d <<a( ;<<<<<AAKLLL c3 	AAU## 	GGT"" 	H 	W 	 	T# T T T  
	_ 	_ 	_  	>q<<<<C<<<< !SPQ!S!STTT1HEJJJJqJJJFvbz/55F2J""6***<<<<V<<<<<< 	>T7+++ 	>MME;;;;(;;;HEDF  	eE400111vbz/55F2J""6***<<<<V<<<<<< <===r#   )NNNN)r$   r%   r&   r2   abcabstractmethodr9   r@   rB   rE   rp   r,   r#   r"   r4   r4   R   s         @ 		5 	5 	5\ \ \& & &) ) )f> f> f> f> f> f>r#   r4   )	metaclassc                       e Zd Zd ZdS )PercentSliceMetac                 T    t          |t                    st          d|           |S )Nz7datasets.percent should only be called with slice, not )r<   rY   rf   )clsslice_values     r"   __getitem__zPercentSliceMeta.__getitem__   s3    +u-- 	fdWbddeeer#   N)r$   r%   r&   ry   r,   r#   r"   ru   ru      s#            r#   ru   c                       e Zd ZdZdS )PercentSlicezSyntactic sugar for defining slice subsplits: `datasets.percent[75:-5]`.

    See the
    [guide on splits](../loading#slice-splits)
    for more information.
    N)r$   r%   r&   r2   r,   r#   r"   r{   r{     s          	Dr#   r{   c                   $    e Zd ZdZd Zd Zd ZdS )rD   z0Represent two split descriptors merged together.c                 "    || _         || _        d S rH   )_split1_split2)r    split1split2s      r"   __init__z_SplitMerged.__init__  s    r#   c                 t    | j                             |          }| j                            |          }||z   S rH   )r~   r9   r   )r    r8   read_instruction1read_instruction2s       r"   r9   z!_SplitMerged.get_read_instruction  s:     L==jII L==jII #444r#   c                 \    dt          | j                   dt          | j                   dS )N(z + ))reprr~   r   r0   s    r"   __repr__z_SplitMerged.__repr__  s/    ?4%%??$t|*<*<????r#   Nr$   r%   r&   r2   r   r9   r   r,   r#   r"   rD   rD     sL        ::  5 5 5
@ @ @ @ @r#   rD   c                   $    e Zd ZdZd Zd Zd ZdS )r`   z,Represent a sub split of a split descriptor.c                 "    || _         || _        d S rH   )_split_slice_value)r    splitrx   s      r"   r   z_SubSplit.__init__&  s    'r#   c                 L    | j                             |          | j                 S rH   )r   r9   r   r7   s     r"   r9   z_SubSplit.get_read_instruction*  s     {//
;;D<MNNr#   c                     d}| j         j        |dz  }|                    | j         j        dn| j         j        | j         j        dn| j         j        | j         j                  }t          | j                   d| dS )Nz{start}:{stop}z:{step}r   )rg   rn   stepz(datasets.percent[z]))r   r   formatrg   rn   r   r   )r    	slice_strs     r"   r   z_SubSplit.__repr__-  s    $	!-"I$$)/7""T=N=T(-54;L;Q"' % 
 
	
 t{##DDyDDDDr#   Nr   r,   r#   r"   r`   r`   #  sO        66( ( (O O O	E 	E 	E 	E 	Er#   r`   c                   <    e Zd ZdZd Zd Zd Zd Zd Zd Z	d Z
d	S )
r=   a  Descriptor corresponding to a named split (train, test, ...).

    Example:
        Each descriptor can be composed with other using addition or slice:

            ```py
            split = datasets.Split.TRAIN.subsplit(datasets.percent[0:25]) + datasets.Split.TEST
            ```

        The resulting split will correspond to 25% of the train split merged with
        100% of the test split.

        A split cannot be added twice, so the following will fail:

            ```py
            split = (
                    datasets.Split.TRAIN.subsplit(datasets.percent[:25]) +
                    datasets.Split.TRAIN.subsplit(datasets.percent[75:])
            )  # Error
            split = datasets.Split.TEST + datasets.Split.ALL  # Error
            ```

        The slices can be applied only one time. So the following are valid:

            ```py
            split = (
                    datasets.Split.TRAIN.subsplit(datasets.percent[:25]) +
                    datasets.Split.TEST.subsplit(datasets.percent[:50])
            )
            split = (datasets.Split.TRAIN + datasets.Split.TEST).subsplit(datasets.percent[:50])
            ```

        But this is not valid:

            ```py
            train = datasets.Split.TRAIN
            test = datasets.Split.TEST
            split = train.subsplit(datasets.percent[:25]).subsplit(datasets.percent[:25])
            split = (train.subsplit(datasets.percent[:25]) + test).subsplit(datasets.percent[:50])
            ```
    c                     || _         d |                    d          D             }|D ]7}t          j        t          |          st          dt           d| d          8d S )Nc                 D    g | ]}|                     d           d         S )[r   )r   )rJ   split_instructions     r"   r\   z'NamedSplit.__init__.<locals>.<listcomp>f  s-    'q'q'qL](9(?(?(D(DQ(G'q'q'qr#   +zSplit name should match 'z' but got 'z'.)_namer   rematchr   rf   )r    r   split_names_from_instruction
split_names       r"   r   zNamedSplit.__init__d  s    
'q'qaeakakloapap'q'q'q$6 	c 	cJ8Iz22 c !aY!a!aS]!a!a!abbbc	c 	cr#   c                     | j         S rH   r   r0   s    r"   __str__zNamedSplit.__str__k  s
    zr#   c                     d| j         dS )NzNamedSplit(r   r   r0   s    r"   r   zNamedSplit.__repr__n  s    ,TZ,,,,r#   c                     t          |t                    r| j        |j        k    S t          |t                    rdS t          |t                    r| j        |k    S t          d|  d|           )r;   Fz%Equality not supported between split z and )r<   r=   r   r4   r   rf   r>   s     r"   r@   zNamedSplit.__eq__q  s|    eZ(( 	Y:,,y)) 	Y5s## 	Y:&&WTWWPUWWXXXr#   c                 "    | j         |j         k     S rH   r   r>   s     r"   __lt__zNamedSplit.__lt__|  s    zEK''r#   c                 *    t          | j                  S rH   )hashr   r0   s    r"   __hash__zNamedSplit.__hash__  s    DJr#   c                 6    t          || j                           S rH   )SplitReadInstructionr   r7   s     r"   r9   zNamedSplit.get_read_instruction  s    #Jtz$:;;;r#   N)r$   r%   r&   r2   r   r   r   r@   r   r   r9   r,   r#   r"   r=   r=   9  s        ( (Tc c c  - - -	Y 	Y 	Y( ( (     < < < < <r#   r=   c                   .     e Zd ZdZ fdZd Zd Z xZS )NamedSplitAllz?Split corresponding to the union of all defined dataset splits.c                 J    t                                          d           d S )Nall)superr   )r    	__class__s    r"   r   zNamedSplitAll.__init__  s!    r#   c                     dS )NzNamedSplitAll()r,   r0   s    r"   r   zNamedSplitAll.__repr__  s      r#   c                 v    d |                                 D             }t          |t                                S )Nc                 ,    g | ]}t          |          S r,   )r   rS   s     r"   r\   z6NamedSplitAll.get_read_instruction.<locals>.<listcomp>  s!    RRR1!44RRRr#   )valuesrU   r   )r    r8   read_instructionss      r"   r9   z"NamedSplitAll.get_read_instruction  s:    RRj>O>O>Q>QRRR$&:&<&<===r#   )r$   r%   r&   r2   r   r   r9   __classcell__r   s   @r"   r   r     s\        II         ! ! !> > > > > > >r#   r   c                   n    e Zd ZdZ ed          Z ed          Z ed          Z e            Z	d Z
dS )Splita"  `Enum` for dataset splits.

    Datasets are typically split into different subsets to be used at various
    stages of training and evaluation.

    - `TRAIN`: the training data.
    - `VALIDATION`: the validation data. If present, this is typically used as
      evaluation data while iterating on a model (e.g. changing hyperparameters,
      model architecture, etc.).
    - `TEST`: the testing data. This is the data to report metrics on. Typically
      you do not want to use this during model iteration as you may overfit to it.
    - `ALL`: the union of all defined dataset splits.

    All splits, including compositions inherit from `datasets.SplitBase`.

    See the [guide](../load_hub#splits) on splits for more information.

    Example:

    ```py
    >>> datasets.SplitGenerator(
    ...     name=datasets.Split.TRAIN,
    ...     gen_kwargs={"split_key": "train", "files": dl_manager.download_and extract(url)},
    ... ),
    ... datasets.SplitGenerator(
    ...     name=datasets.Split.VALIDATION,
    ...     gen_kwargs={"split_key": "validation", "files": dl_manager.download_and extract(url)},
    ... ),
    ... datasets.SplitGenerator(
    ...     name=datasets.Split.TEST,
    ...     gen_kwargs={"split_key": "test", "files": dl_manager.download_and extract(url)},
    ... )
    ```
    traintest
validationc                 H    |dk    rt                      nt          |          S )z9Create a custom split with datasets.Split('custom_name').r   )r   r=   )rw   r   s     r"   __new__zSplit.__new__  s     "&%--}Z5E5EEr#   N)r$   r%   r&   r2   r=   TRAINTEST
VALIDATIONr   ALLr   r,   r#   r"   r   r     sm        ! !F JwE:fDL))J
-//CF F F F Fr#   r   SlicedSplitInfo
split_inforx   c                   2    e Zd ZdZddZd Zd Zd Zd ZdS )	r   a  Object containing the reading instruction for the dataset.

    Similarly to `SplitDescriptor` nodes, this object can be composed with itself,
    but the resolution happens instantaneously, instead of keeping track of the
    tree, such as all instructions are compiled and flattened in a single
    SplitReadInstruction object containing the list of files and slice to use.

    Once resolved, the instructions can be accessed with:

    ```
    read_instructions.get_list_sliced_split_info()  # List of splits to use
    ```

    Nc                     t          d          | _        |r&|                     t          |d                      d S d S )Nz?Overlap between splits. Split {key} has been added with itself.)	error_msg)r   rx   )r   _splitsaddr   )r    r   s     r"   r   zSplitReadInstruction.__init__  sO    %0tuuu 	OHH_
MMMNNNNN	O 	Or#   c                 .    || j         |j        j        <   dS )z,Add a SlicedSplitInfo the read instructions.N)r   r   r   )r    sliced_splits     r"   r   zSplitReadInstruction.add  s    
 6B\,1222r#   c                     t                      }|j                            | j                   |j                            |j                   |S )zMerging split together.)r   r   update)r    r?   r   s      r"   rE   zSplitReadInstruction.__add__  sH    
 122!((666!((777  r#   c                    t                      }| j                                        D ]^}|j        t	          d|j        j         d          |                                }||d<   |                    t          di |           _|S )zSub-splits.NzTrying to slice Split z which has already been slicedrx   r,   )
r   r   r   rx   rf   r   r   _asdictr   r   )r    rx   r   ro   s       r"   ry   z SplitReadInstruction.__getitem__  s     122$$&& 	8 	8A}( !k!,:K!k!k!klll		A*Am!!/"6"6A"6"67777  r#   c                 N    t          | j                                                  S rH   )rP   r   r   r0   s    r"   get_list_sliced_split_infoz/SplitReadInstruction.get_list_sliced_split_info  s    DL''))***r#   rH   )	r$   r%   r&   r2   r   r   rE   ry   r   r,   r#   r"   r   r     su         O O O OB B B! ! !
! 
! 
!+ + + + +r#   r   c                       e Zd ZdZdd fd
Zdeeef         f fdZdeeef         de	f fdZ
d	e	f fd
Zed             Zeddeeef         dee         fd            Zd Zd ZdefdZededd fd            Z xZS )	SplitDictzSplit info object.Nr   c                H     t                      j        |i | || _        d S rH   )r   r   r   )r    r   argskwargsr   s       r"   r   zSplitDict.__init__  s-    $)&)))(r#   keyc                     t          |          | v r.t                                          t          |                    S t          | j        |                                 |          }t          |          S )Nr   )r   r   ry   r
   r   r   r.   )r    r   r!   r   s      r"   ry   zSplitDict.__getitem__  sl    s88t77&&s3xx000 2& KKMM  L
  ---r#   valuec                     ||j         k    rt          d| d|j          d          || v rt          d| d          t                                          ||           d S )Nz!Cannot add elem. (key mismatch: 'z' != 'z')Split  already present)r   rf   r   __setitem__)r    r   r   r   s      r"   r   zSplitDict.__setitem__  sw    %*ZZZEJZZZ[[[$;;;c;;;<<<C'''''r#   r   c                     |j         | v rt          d|j          d          | j        |_        t                                          |j         |           dS )zAdd the split info.r   r   N)r   rf   r   r   r   )r    r   r   s     r"   r   zSplitDict.add   sY    ?d""GjoGGGHHH"&"3
JOZ88888r#   c                 X    t          d |                                 D                       S )z$Return the total number of examples.c              3   $   K   | ]}|j         V  d S rH   )r   rS   s     r"   rL   z/SplitDict.total_num_examples.<locals>.<genexpr>*  s$      99a1>999999r#   )rU   r   r0   s    r"   total_num_exampleszSplitDict.total_num_examples'  s)     994;;==999999r#   r   r   c                 B   t          |t                    r!t          |                                          }||r|d                             d          nd} | |          }|D ]8}t          |t                    rt          di |}|                    |           9|S )zIReturns a new SplitDict initialized from a Dict or List of `split_infos`.Nr   r   r   r,   )r<   dictrP   r   getr   r   )rw   r   r   r8   r   s        r"   from_split_dictzSplitDict.from_split_dict,  s     k4(( 	5{113344KALV;q>--n===RVLSl333
% 	' 	'J*d++ 5&4444
NN:&&&&r#   c                     g }|                                  D ]5\  }}t          j        |          }||_        |                    |           6|S )z0Returns a list of SplitInfo protos that we have.)itemscopydeepcopyr   ri   )r    outr   r   s       r"   to_split_dictzSplitDict.to_split_dict>  sS    &*jjll 	# 	#"J
z22J(JOJJz""""
r#   c                 f    t                               |                                 | j                  S rH   )r   r   r   r   r0   s    r"   r   zSplitDict.copyG  s'    ((););)=)=t?PQQQr#   returnc                     d |                                  D             }|D ]}|                    dd            |D ]}|                    dd            |S )Nc                 ,    g | ]}t          |          S r,   )r   rS   s     r"   r\   z+SplitDict._to_yaml_list.<locals>.<listcomp>K  s    777Qvayy777r#   r   r   )r   pop)r    r   split_info_dicts      r"   _to_yaml_listzSplitDict._to_yaml_listJ  sv    77$"4"4"6"6777" 	7 	7O6666" 	6 	6O5555
r#   	yaml_datac                 ,    |                      |          S rH   )r   )rw   r   s     r"   _from_yaml_listzSplitDict._from_yaml_listT  s    ""9---r#   rH   )r$   r%   r&   r2   r   r   r4   r   ry   r   r   r   r+   r   classmethodr   r   r   r   r   r   rP   r   r   r   r   s   @r"   r   r     s       +/ ) ) ) ) ) ) ).uY^4 . . . . . .(uY^4 (Y ( ( ( ( ( (9i 9 9 9 9 9 9 : : X:  %d
*; 8TW=    ["  R R Rt     . . . . . [. . . . .r#   r   c                   |    e Zd ZU dZeed<    ej        e          Z	e
ed<    ej        d          Zeed<   d Zd	S )
SplitGeneratora  Defines the split information for the generator.

    This should be used as returned value of
    `GeneratorBasedBuilder._split_generators`.
    See `GeneratorBasedBuilder._split_generators` for more info and example
    of usage.

    Args:
        name (`str`):
            Name of the `Split` for which the generator will
            create the examples.
        **gen_kwargs (additional keyword arguments):
            Keyword arguments to forward to the `DatasetBuilder._generate_examples` method
            of the builder.

    Example:

    ```py
    >>> datasets.SplitGenerator(
    ...     name=datasets.Split.TRAIN,
    ...     gen_kwargs={"split_key": "train", "files": dl_manager.download_and_extract(url)},
    ... )
    ```
    r   )default_factory
gen_kwargsF)initr   c                     t          | j                  | _        t          | j                   t          | j                  | _        d S )N)r   )r   r   r=   r   r   r0   s    r"   __post_init__zSplitGenerator.__post_init__x  s9    	NN	49#333r#   N)r$   r%   r&   r2   r   r'   r)   r*   r   r   r   r   r   r   r,   r#   r"   r   r   Y  sz          2 III({(>>>J>>>-K-5999J	9994 4 4 4 4r#   r   )'r2   rq   collectionsr   r)   r   r   typingr   r   r   r   arrow_readerr	   r
   namingr   utils.py_utilsr   r   r   r.   ABCMetar4   typeru   r{   rl   rD   r`   r=   r   r   
namedtupler   r   r   r   r   r,   r#   r"   <module>r	     sn      


          				 ! ! ! ! ! ! . . . . . . . . . . . . B B B B B B B B       2 2 2 2 2 2 2 2 . . . . . . . .4 3 3 3 3 3 3 3 3,b> b> b> b> b>#+ b> b> b> b>V    t   		 		 		 		 		- 		 		 		 		 @ @ @ @ @9 @ @ @ E E E E E	 E E E,J< J< J< J< J< J< J< J<Z> > > > >J > > >,F ,F ,F ,F ,F ,F ,F ,F` )+( 4+ 4+ 4+ 4+ 4+ 4+ 4+ 4+nQ. Q. Q. Q. Q. Q. Q. Q.h !4 !4 !4 !4 !4 !4 !4 !4 !4 !4r#   