
    +gdg                     6   d Z ddlZddlZddlZddlZddlZddlmZ ddlm	Z	 ddl
mZmZmZmZ ddlZddlmZ ddlmZ ddlmZmZ dd	lmZmZmZmZ dd
lmZ ddl m!Z! erddl"m#Z# ddl$m%Z%m&Z&  ej'        e(          Z)dZ* ej+        dedd          dej,                  Z- ej+        d          Z. G d de/          Z0 G d de/          Z1 ed           G d d                      Z2	 	 d2de3ded         dee3df         d ee3         d!ee3         f
d"Z4 G d# d$          Z5 G d% d&e5          Z6 G d' d(e5          Z7 ed           G d) d*                      Z8 ed           G d+ d,                      Z9d- Z:d. Z;d/ Z<d0 Z= G d1 d          Z>dS )3z Arrow ArrowReader.    N)	dataclass)Path)TYPE_CHECKINGListOptionalUnion   )DownloadConfig)	_split_refilenames_for_dataset_split)InMemoryTableMemoryMappedTableTableconcat_tables)logging)cached_path)DatasetInfo)Split	SplitInfoz=https://storage.googleapis.com/huggingface-nlp/cache/datasetsz
^
 (?P<split>z)
 (\[
    ((?P<from>-?\d+)
     (?P<from_pct>%)?)?
    :
    ((?P<to>-?\d+)
     (?P<to_pct>%)?)?
 \])?(\((?P<rounding>[^\)]*)\))?
$
z\s*\+\s*c                       e Zd ZdZdS )DatasetNotOnHfGcsErrorz?When you can't get the dataset from the Hf google cloud storageN__name__
__module____qualname____doc__     5lib/python3.11/site-packages/datasets/arrow_reader.pyr   r   A   s        IIDr   r   c                       e Zd ZdZdS )MissingFilesOnHfGcsErrorz9When some files are missing on the Hf oogle cloud storageNr   r   r   r    r"   r"   G   s        CCDr   r"   T)frozenc                   4    e Zd ZU dZeed<   ee         ed<   dS )FileInstructionsa}  The file instructions associated with a split ReadInstruction.

    Attributes:
        num_examples: `int`, The total number of examples
        file_instructions: List[dict(filename, skip, take)], the files information.
            The filenames contains the relative path, not absolute.
            skip/take indicates which example read in the file: `ds.slice(skip, take)`
    num_examplesfile_instructionsN)r   r   r   r   int__annotations__r   dictr   r   r    r%   r%   M   s:           Dz!!!!!r   r%   namesplit_infosr   instructionReadInstructionfiletype_suffixprefix_pathc                     d |D             }d |D              fd|D             }t          |t                    st                              |          }|                    |          }g }d}	|D ]}
||
j                 }||
j                 }|
j                 }|
j        dn|
j        }|
j        |n|
j        }|*|D ]&}|	||z
  z  }	|                    ||||z
  d           'ud}d}t          ||          D ]h\  }}||z  }||k     rS||k    rM||k    r||z
  nd}||k     r||z
  |z
  nd}|dk    r:|                    |||d           |	|dk    r||z
  n|z  }	||z  }it          |	|          S )	ap  Returns instructions of the split dict.

    Args:
        name: Name of the dataset.
        split_infos: `List[SplitInfo]`, Dataset splits information
        instruction: `ReadInstruction` or `str`
        filetype_suffix: :obj:`str`, optional suffix of dataset files, e.g. 'arrow' or 'parquet'

    Returns:
        file_intructions: FileInstructions instance
    c                 (    i | ]}|j         |j        S r   )r+   r&   .0infos     r    
<dictcomp>z*make_file_instructions.<locals>.<dictcomp>n   s    EEE	4,EEEr   c                 (    i | ]}|j         |j        S r   )r+   shard_lengthsr3   s     r    r6   z*make_file_instructions.<locals>.<dictcomp>o   s    PPPD$)T%7PPPr   c                 d    i | ],}|j         t          |j         |j                             -S ))pathdataset_namesplitr/   r8   )r+   r   )r4   r5   r/   r+   name2shard_lengthsr0   s     r    r6   z*make_file_instructions.<locals>.<dictcomp>p   sW     	 	 	  		.)+,TY7
 
 
	 	 	r   r   N)filenameskiptaker   )r&   r'   )

isinstancer.   	from_specto_absolute	splitnamefrom_toappendzipr%   )r+   r,   r-   r/   r0   name2lenname2filenamesabsolute_instructionsr'   r&   	abs_instrsplit_length	filenamesr8   rE   rF   r>   index_start	index_endshard_lengthr?   r@   r=   s   `  ``                 @r    make_file_instructionsrR   \   sS   $ FEEEEHPPKPPP	 	 	 	 	 	 	  	 	 	N k?33 =%//<<'33H== L* , ,		 34"9#67	*9+>?_,)/&\1\\y| % d dU
*!((hWY\aWa)b)bccccd KI*-i*G*G 	, 	,&,\)	9$$k)9)927+2E2E5;..1D689nn2+d22"Dqyy %,,(DZ^-_-_``` 42::L4$7$74OL|+	, !+   r   c                       e Zd ZdZdeded         fdZddefdZddefd	Z	d
 Z
	 ddZ	 	 ddee         ded         fdZdefdZdS )
BaseReaderz@
    Build a Dataset object out of Instruction instance(s).
    r:   r5   r   c                 0    || _         || _        d| _        dS )zInitializes ArrowReader.

        Args:
            path (str): path where tfrecords are stored.
            info (DatasetInfo): info about the dataset.
        N)_path_info_filetype_suffix)selfr:   r5   s      r    __init__zBaseReader.__init__   s      
.2
/3r   Freturnc                     t           )=Returns a Dataset instance from given (filename, skip, take).)NotImplementedError)rY   filename_skip_take	in_memorys      r    _get_table_from_filenamez#BaseReader._get_table_from_filename   s    !!r   c                    t          |          dk    st          d |D                       st          d          g }t          j        |          }|D ]0}t
          j                            | j        |d                   |d<   1|D ].}| 	                    ||          }|
                    |           /d |D             }|s"| j        | j        j        t          d          |p7t          j        g t          j        | j        j        j                  	          g}t          |          d
k    rt%          |          n|d         }|S )a  Returns Dataset for given file instructions.

        Args:
            files: List[dict(filename, skip, take)], the files information.
                The filenames contain the absolute path, not relative.
                skip/take indicates which example read in the file: `ds.slice(skip, take)`
            in_memory (bool, default False): Whether to copy the data in-memory.
        r   c              3   @   K   | ]}t          |t                    V  d S N)rA   r*   )r4   fs     r    	<genexpr>z)BaseReader._read_files.<locals>.<genexpr>   s,      %I%IajD&9&9%I%I%I%I%I%Ir   z&please provide valid file informationsr>   r`   c                 8    g | ]}t          |          d k    |S )r   )len)r4   ts     r    
<listcomp>z*BaseReader._read_files.<locals>.<listcomp>   s#    8881SVVaZZQZZZr   NzqTried to read an empty table. Please specify at least info.features to create an empty table with the right type.)schemar	   )ri   all
ValueErrorcopydeepcopyosr:   joinrV   ra   rG   rW   featuresr   from_batchesparl   typer   )rY   filesr`   	pa_tablesre   f_dictpa_tables          r    _read_fileszBaseReader._read_files   sd    u::??#%I%I5%I%I%I"I"I?EFFF	e$$ 	D 	DAGLLQz]CCAjMM 	' 	'F";;Fi;XXHX&&&&88	888	 	dj0DJ4G4O D   m-"<R	RVR\ReRjHkHk"l"l"l!m	/29~~/B/B=+++	RSr   c                 P    t          |||| j        | j                  }|j        }|S )z?Return list of dict {'filename': str, 'skip': int, 'take': int})r/   r0   )rR   rX   rV   r'   )rY   r+   r-   r,   r'   rw   s         r    get_file_instructionsz BaseReader.get_file_instructions   s8    2+{D<Q_c_i
 
 
 "3r   c                     |                      |||          }|sd| d}t          |          |                     |||          S )a  Returns Dataset instance(s).

        Args:
            name (str): name of the dataset.
            instructions (ReadInstruction): instructions to read.
                Instruction can be string and will then be passed to the Instruction
                constructor as it.
            split_infos (list of SplitInfo proto): the available splits for dataset.
            in_memory (bool, default False): Whether to copy the data in-memory.

        Returns:
             kwargs to build a single Dataset instance.
        zInstruction "z" corresponds to no data!)rw   original_instructionsr`   )r}   rn   
read_files)rY   r+   instructionsr,   r`   rw   msgs          r    readzBaseReader.read   sX    * **4{KK 	"I,IIICS//!U,Zcdddr   Nrw   r   )Nr.   r   c                     |                      ||          }|ddlm}  |t          |                    }nd}|| j        |d}|S )aJ  Returns single Dataset instance for the set of file instructions.

        Args:
            files: List[dict(filename, skip, take)], the files information.
                The filenames contains the relative path, not absolute.
                skip/take indicates which example read in the file: `ds.skip().take()`
            original_instructions: store the original instructions used to build the dataset split in the dataset.
            in_memory (bool, default False): Whether to copy the data in-memory.

        Returns:
            kwargs to build a Dataset instance.
        rg   Nr	   )r   )arrow_tabler5   r<   )r{   splitsr   strrW   )rY   rw   r   r`   rz   r   r<   dataset_kwargss           r    r   zBaseReader.read_files   sk    & ##EY#?? ,%%%%%%E#34455EEE)14:PUVVr   download_configc                 \   t           dz   |                    t          j        d          z   }	 t          j                            |d          }t          |                    t          j        d                    }t          j        |t          j                            | j	        d                     | j
        7| j
                            | j
                            | j	                             n"# t          $ r}t          |          dd}~ww xY w	 | j
        j        D ]}|                     | j
        j        || j
        j                                                  }|D ]}	t'          t)          |	d                                       | j	                            }
t          j                            ||
          }t          |                    t          j        d          |          }t          j        ||	d                    dS # t          $ r}t-          |          dd}~ww xY w)a%  
        Download the dataset files from the Hf GCS

        Args:
            dl_cache_dir: `str`, the local cache directory used to download files
            relative_data_dir: `str`, the relative directory of the remote files from
                the `datasets` directory on GCS.

        /zdataset_info.jsonN)r+   r-   r,   r>   )r   )HF_GCP_BASE_URLreplacerq   sepr:   rr   r   shutilmoverV   rW   updatefrom_directoryFileNotFoundErrorr   r   r}   builder_namevaluesr   r   relative_tor"   )rY   r   relative_data_dirremote_cache_dirremote_dataset_infodownloaded_dataset_infoerrr<   r'   file_instructionfile_to_downloadremote_prepared_filenamedownloaded_prepared_filenames                r    download_from_hf_gcszBaseReader.download_from_hf_gcs  s*    +S03D3L3LRVUX3Y3YY	8"$',,/?AT"U"U&12E2M2MbfVY2Z2Z&[&[#K/djJ]1^1^___z%
!!$*";";DJ"G"GHHH  	8 	8 	8(--47	8	:* \ \$($>$>0 % $
 1 8 8 : : %? % %!
 ): \ \$'*40@0L+M+M+Y+YZ^Zd+e+e'f'f$/1w||<LN^/_/_,3>088EEWf4 4 40 K <>Nz>Z[[[[\\ \ ! 	: 	: 	:*3//T9	:s1   CC1 1
D;DDC6H 
H+H&&H+F)NF)r   r   r   r   r   r   rZ   r   ra   r{   r}   r   r   r*   r   r   r
   r   r   r   r    rT   rT      s        	4S 	4(? 	4 	4 	4 	4" "u " " " " U    6   e e e e< JN	 Dz  %%EF   <":N ": ": ": ": ": ":r   rT   c                   d     e Zd ZdZdeded         f fdZd
defdZe	d
defd	            Z
 xZS )ArrowReaderz
    Build a Dataset object out of Instruction instance(s).
    This Reader uses either memory mapping or file descriptors (in-memory) on arrow files.
    r:   r5   r   c                 Z    t                                          ||           d| _        dS )zInitializes ArrowReader.

        Args:
            path (str): path where Arrow files are stored.
            info (DatasetInfo): info about the dataset.
        arrowNsuperrZ   rX   rY   r:   r5   	__class__s      r    rZ   zArrowReader.__init__:  s-     	t$$$ 'r   Fr[   c                    |d         d|v r|d         ndd|v r|d         nd}}}t                               ||          }|dk    rt          |          |z
  }|1|/|dk    r|t          |          k    s|                    ||          }|S )r]   r>   r?   Nr@   rg   r   r   )r   
read_tableri   slice)rY   r_   r`   r>   r?   r@   tables          r    ra   z$ArrowReader._get_table_from_filenameD  s     z**04F*F*Fv&&D*04F*F*Fv&&D $
 &&x9&EE2::u::$D 0$!))PSTYPZPZHZHZKKd++Er   c                 L    |rt           nt          }|                    |           S )z
        Read table from file.

        Args:
            filename (str): File name of the table.
            in_memory (bool, default=False): Whether to copy the data in-memory.

        Returns:
            pyarrow.Table
        )r   r   	from_file)r>   r`   	table_clss      r    r   zArrowReader.read_tableS  s'     &/EMM4E	""8,,,r   r   )r   r   r   r   r   r   rZ   r   ra   staticmethodr   __classcell__r   s   @r    r   r   4  s         
(S ((? ( ( ( ( ( ( u     - - - - - \- - - - -r   r   c                   >     e Zd ZdZdeded         f fdZd Z xZS )ParquetReaderzv
    Build a Dataset object out of Instruction instance(s).
    This Reader uses memory mapping on parquet files.
    r:   r5   r   c                 Z    t                                          ||           d| _        dS )zInitializes ParquetReader.

        Args:
            path (str): path where tfrecords are stored.
            info (DatasetInfo): info about the dataset.
        parquetNr   r   s      r    rZ   zParquetReader.__init__i  s-     	t$$$ )r   c                     |d         d|v r|d         ndd|v r|d         nd}}}t          j        |d          }|1|/|dk    r|t          |          k    s|                    ||          }|S )r]   r>   r?   Nr@   T)
memory_mapr   )pqr   ri   r   )rY   r_   kwargsr>   r?   r@   rz   s          r    ra   z&ParquetReader._get_table_from_filenames  s     z**04F*F*Fv&&D*04F*F*Fv&&D $ =d;;; 0$!))PST\P]P]H]H]~~dD11Hr   )	r   r   r   r   r   r   rZ   ra   r   r   s   @r    r   r   c  si         
*S *(? * * * * * *      r   r   c                   2    e Zd ZU dZeed<   eed<   eed<   dS )_AbsoluteInstructionz?A machine friendly slice: defined absolute positive boundaries.rD   rE   rF   N)r   r   r   r   r   r)   r(   r   r   r    r   r     s1         IINNNJJJGGGGGr   r   c                       e Zd ZU dZeed<   dZee         ed<   dZ	ee         ed<   dZ
ee         ed<   dZee         ed<   d ZdS )	_RelativeInstructionzHRepresents a single parsed slicing instruction, can use % and negatives.rD   NrE   rF   unitroundingc                    | j         | j         dvrt          d          | j        | j        dvrt          d          | j         dk    r| j        t          d          | j         dk    r.| j        't	          | j                  dk    rt          d          | j         dk    r.| j        't	          | j                  dk    rt          d          | j        | j         dk    rd	n| j        | j        d
<   d S )N)%abszunit must be either % or abs)closestpct1_dropremainderz5rounding must be either closest or pct1_dropremainderr   zAIt is forbidden to specify rounding if not using percent slicing.d   z2Percent slice boundaries must be > -100 and < 100.r   r   )r   rn   r   rE   r   rF   __dict__rY   s    r    __post_init__z"_RelativeInstruction.__post_init__  s   9 TYl%B%B;<<<=$>_)_)_TUUU9 9`aaa9
 63tz??S;P;PQRRR9 3DGs8J8JQRRR151F49X[K[K[IIaeanj!!!r   )r   r   r   r   r   r)   rE   r   r(   rF   r   r   r   r   r   r    r   r     s         RRNNNE8C=BD(3-"Hhsm"""o o o o or   r   c           
      
   t                               |           }|st          d|            |                    d          s|                    d          rdnd}t	          |                    d          |                    d          |                    d          r"t          |                    d                    nd	|                    d
          r"t          |                    d
                    nd	|          S )z)Returns ReadInstruction for given string.z!Unrecognized instruction format: from_pctto_pctr   r   r<   r   fromNrF   )
split_namer   rE   rF   r   )_SUB_SPEC_REmatchrn   groupr.   r(   )specresr   s      r    _str_to_read_instructionr     s    


T
"
"C ECTCCDDD))J''I399X+>+>I33ED99W%%:&&(+		&(9(9Cc#))F##$$$t#&99T??<3syy   r   c                 d    |dk     rd}t          |          | t          j        |dz            z  S )Nr   zUsing "pct1_dropremainder" rounding on a split with less than 100 elements is forbidden: it always results in an empty dataset.      Y@)rn   mathtrunc)boundaryr&   r   s      r    _pct_to_abs_pct1r     s@    cL 	 oodj!56666r   c                 F    t          t          | |z  dz                      S )Nr   )r(   round)r   r&   s     r    _pct_to_abs_closestr     s"    uX,u455666r   c                 (   | j         dk    rt          nt          }| j        }||vr#t	          d| dt          |           d          ||         }| j        }| j        }| j        dk    r!|dn |||          }||n |||          }n|dn|}||n|}t          |          |k    st          |          |k    rd|pd	 d
|pd	 d| d}t	          |          |dk     r||z   }n|dk    rd}|dk     r||z   }n||k    rd}t          |||          S )zReturns _AbsoluteInstruction instance for given RelativeInstruction.

    Args:
        rel_instr: RelativeInstruction instance.
        name2len: dict {split_name: num_examples}.
    r   zUnknown split "z". Should be one of .r   Nr   zRequested slice [ :z] incompatible with z
 examples.)r   r   r   rD   rn   listrE   rF   r   r   r   )	rel_instrrI   
pct_to_absr<   r&   rE   rF   r   s           r    _rel_to_abs_instrr     sy    )2(:i(G(G$$M]JEHW5WWd8nnWWWXXXE?LOE	B~]

5,(G(GZ\\ZZL-I-I]Z\\R
5zzL  CGGl$:$:f%+2ffbfflfffooqyyu$	!	AvvB	|		ub111r   c                   j    e Zd ZdZd Zed             ZddZed             Zd Z	d Z
d	 Zd
 Zd ZdS )r.   a  Reading instruction for a dataset.

    Examples::

      # The following lines are equivalent:
      ds = datasets.load_dataset('mnist', split='test[:33%]')
      ds = datasets.load_dataset('mnist', split=datasets.ReadInstruction.from_spec('test[:33%]'))
      ds = datasets.load_dataset('mnist', split=datasets.ReadInstruction('test', to=33, unit='%'))
      ds = datasets.load_dataset('mnist', split=datasets.ReadInstruction(
          'test', from_=0, to=33, unit='%'))

      # The following lines are equivalent:
      ds = datasets.load_dataset('mnist', split='test[:33%]+train[1:-1]')
      ds = datasets.load_dataset('mnist', split=datasets.ReadInstruction.from_spec(
          'test[:33%]+train[1:-1]'))
      ds = datasets.load_dataset('mnist', split=(
          datasets.ReadInstruction('test', to=33, unit='%') +
          datasets.ReadInstruction('train', from_=1, to=-1, unit='abs')))

      # The following lines are equivalent:
      ds = datasets.load_dataset('mnist', split='test[:33%](pct1_dropremainder)')
      ds = datasets.load_dataset('mnist', split=datasets.ReadInstruction.from_spec(
          'test[:33%](pct1_dropremainder)'))
      ds = datasets.load_dataset('mnist', split=datasets.ReadInstruction(
          'test', from_=0, to=33, unit='%', rounding="pct1_dropremainder"))

      # 10-fold validation:
      tests = datasets.load_dataset(
          'mnist',
          [datasets.ReadInstruction('train', from_=k, to=k+10, unit='%')
          for k in range(0, 100, 10)])
      trains = datasets.load_dataset(
          'mnist',
          [datasets.ReadInstruction('train', to=k, unit='%') + datasets.ReadInstruction('train', from_=k+10, unit='%')
          for k in range(0, 100, 10)])

    c                     || _         d S rd   _relative_instructions)rY   relative_instructionss     r    _initzReadInstruction._init  s    &;###r   c                 Z    |                      |           }|                    |           |S )zCReturns ReadInstruction obj initialized with relative_instructions.)__new__r   )clsr   results      r    ,_read_instruction_from_relative_instructionsz<ReadInstruction._read_instruction_from_relative_instructions  s-     S!!*+++r   Nc           	      T    |                      t          |||||          g           dS )a  Initialize ReadInstruction.

        Args:
            split_name (str): name of the split to read. Eg: 'train'.
            rounding (str, optional): The rounding behaviour to use when percent slicing is
                used. Ignored when slicing with absolute indices.
                Possible values:
                 - 'closest' (default): The specified percentages are rounded to the
                     closest value. Use this if you want specified percents to be as
                     much exact as possible.
                 - 'pct1_dropremainder': the specified percentages are treated as
                     multiple of 1%. Use this option if you want consistency. Eg:
                         len(5%) == 5 * len(1%).
                     Using this option, one might not be able to use the full set of
                     examples, if the number of those is not a multiple of 100.
            from_ (int):
            to (int): alternative way of specifying slicing boundaries. If any of
                {from_, to, unit} argument is used, slicing cannot be specified as
                string.
            unit (str): optional, one of:
                '%': to set the slicing unit as percents of the split size.
                'abs': to set the slicing unit as absolute numbers.
        N)r   r   )rY   r   r   rE   rF   r   s         r    rZ   zReadInstruction.__init__  s0    6 	

(UBhOOPQQQQQr   c                     t          |          }t                              |          }|st          d|           t	          |d                   }t          d |dd         D             |          S )aM  Creates a `ReadInstruction` instance out of a string spec.

        Args:
            spec (`str`):
                Split(s) + optional slice(s) to read + optional rounding
                if percents are used as the slicing unit. A slice can be specified,
                using absolute numbers (`int`) or percentages (`int`).

        Examples:

            ```
            test: test split.
            test + validation: test split + validation split.
            test[10:]: test split, minus its first 10 records.
            test[:10%]: first 10% records of test split.
            test[:20%](pct1_dropremainder): first 10% records, rounded with the pct1_dropremainder rounding.
            test[:-5%]+train[40%:60%]: first 95% of test + middle 20% of train.
            ```

        Returns:
            ReadInstruction instance.
        z&No instructions could be built out of r   c              3   4   K   | ]}t          |          V  d S rd   )r   )r4   subs     r    rf   z,ReadInstruction.from_spec.<locals>.<genexpr>Q  s+      FFc,S11FFFFFFr   r	   N)r   _ADDITION_SEP_REr<   rn   r   sum)r   r   subsr-   s       r    rB   zReadInstruction.from_spec4  sz    0 4yy%%d++ 	NLdLLMMM.tAw77FFT!""XFFFTTTr   c                    g }| j         D ]}|j        }|j        |j        y|j        }|j        }|j        }|j        }|dk    r|nd}|t          |          |z   nd}|t          |          |z   nd}d| d| d}|dk    r||dk    rd| dnd}	|||	z   z  }|                    |           d	                    |          S )
Nr   r   [r   ]r   ()+)	r   rD   rE   rF   r   r   r   rG   rr   )
rY   rel_instr_specsr   rel_instr_specrE   rF   r   r   	slice_strrounding_strs
             r    to_speczReadInstruction.to_specS  s   4 	3 	3I&0N*il.F!\ ~$-#s{{tt-2->E

T))B')~SWWt^^2------	'+s{{x7KPX\ePePeOOOOOkm  )l"::"">2222xx(((r   c                 >   t          |t                    sd}t          |          | j        }|j        }|d         j        dk    rA|d         j        dk    r0| j        d         j        |d         j        k    rt          d          |                     ||z             S )zEReturns a new ReadInstruction obj, result of appending other to self.zAReadInstruction can only be added to another ReadInstruction obj.r   r   zPIt is forbidden to sum ReadInstruction instances with different rounding values.)rA   r.   	TypeErrorr   r   r   rn   r   )rY   otherr   self_ris	other_riss        r    __add__zReadInstruction.__add__g  s    %11 	!UCC.. .0	QK%%!!U**+A.79Q<;PPPoppp@@IAUVVVr   c                 *    |                                  S rd   )r  r   s    r    __str__zReadInstruction.__str__v  s    ||~~r   c                     d| j          dS )NzReadInstruction(r   r   r   s    r    __repr__zReadInstruction.__repr__y  s    @$"=@@@@r   c                 *    fd| j         D             S )aZ  Translate instruction into a list of absolute instructions.

        Those absolute instructions are then to be added together.

        Args:
            name2len (`dict`):
                Associating split names to number of examples.

        Returns:
            list of _AbsoluteInstruction instances (corresponds to the + in spec).
        c                 0    g | ]}t          |          S r   )r   )r4   r   rI   s     r    rk   z/ReadInstruction.to_absolute.<locals>.<listcomp>  s$    ddd9!)X66dddr   r   )rY   rI   s    `r    rC   zReadInstruction.to_absolute|  s#     edddHcddddr   )NNNN)r   r   r   r   r   classmethodr   rZ   rB   r  r	  r  r  rC   r   r   r    r.   r.     s        $ $L< < <   [R R R R: U U [U<) ) )(W W W  A A Ae e e e er   )NN)?r   ro   r   rq   rer   dataclassesr   pathlibr   typingr   r   r   r   pyarrowru   pyarrow.parquetr   r   download.download_configr
   namingr   r   r   r   r   r   r   utilsr   utils.file_utilsr   r5   r   r   r   r   
get_loggerr   loggerr   compileXr   r   ConnectionErrorr   r"   r%   r   rR   rT   r   r   r   r   r   r   r   r   r.   r   r   r    <module>r      sI        				 				  ! ! ! ! ! !       7 7 7 7 7 7 7 7 7 7 7 7           4 4 4 4 4 4 : : : : : : : : I I I I I I I I I I I I       ) ) ) ) ) )  )!!!!!!(((((((( 
	H	%	%QrzadO   D   2:k** 	 	 	 	 	_ 	 	 		 	 	 	 	 	 	 	 $" " " " " " " "$ &*!%@ @
@k"@ s--.@ c]	@
 #@ @ @ @FR: R: R: R: R: R: R: R:j,- ,- ,- ,- ,-* ,- ,- ,-^    J   > $        $o o o o o o o o0  7 7 77 7 72 2 2Dde de de de de de de de de der   