
    +gd^                        d Z ddlZddlZddlZddlZddlZddlZddlZddl	Z	ddl
mZ ddlmZmZ ddlmZ ddlmZmZmZ ddlmZ ddlmZ dd	l	mZmZ dd
lmZmZmZmZmZm Z m!Z!m"Z"m#Z# ddl$m%Z% ddl&Z&ddl'Z'ddl(Z'ddl)Z*ddl+m,Z, ddl-m.Z. ddl/m0Z0 ddl/m1Z1 	 ddl2Z3ddl2m4Z4m5Z5 n# e6$ r	 dxZ3xZ5Z4Y nw xY w e1j7        e8          Z9ej:        Z;d Z<de#e=e>f         de=fdZ?de>de>dee>e>f         fdZ@d ZAed             ZBedUde=fd            ZCd ZDd  ZEd! ZFd" ZG G d# d$eH          ZI G d% d&eJ          ZKd' ZL	 	 	 	 	 	 	 	 	 dVd)eegef         d*ed+eMd,eMd-eMd.eMd/e e=         d0e=d1e eN         d2eMd3e e>         defd4ZO G d5 d6          ZPdWd8ZQd9e>de!e>e e>         f         fd:ZRd;e>de!e>e>e>e>f         fd<ZS G d= d>e&jT                  ZTd? ZUed@             ZVdA ZWdB ZXe0jY         e,jZ        dC          k     r eXe          dD             Z[n?e0jY        j\        ddE          e,jZ        dC          j\        k    r eXe          dF             Z]e0jY         e,jZ        dG          k     r eXe          dH             Z^ne0jY        j\        ddE          e,jZ        dG          j\        k    r eXe          dI             Z^n?e0jY        j\        ddE          e,jZ        dC          j\        k    r eXe          dJ             Z^dK Z_ e"dL          Z`dMeja        dNedOee`         f         dPeHde=fdQZbdRe#ejc        j        e'jc        j        f         dNedOee`         f         dSeeH         dee`         fdTZddS )Xz)Some python utils function and classes.

    N)contextmanager)fieldsis_dataclass)BytesIO)ManagerPoolRLock)Empty)
disk_usage)CodeTypeFunctionType)	AnyCallableDictIterableListOptionalTupleTypeVarUnion)urlparse)version)tqdm   )config   )logging)FinalLiteralc                     | sdS g d}t          |           } |D ]\  }}| |z  }|dk    r
|dd| c S t          |            dS )a6  Returns a human readable size string.

    If size_in_bytes is None, then returns "Unknown size".

    For example `size_str(1.5 * datasets.units.GiB) == "1.50 GiB"`.

    Args:
        size_in_bytes: `int` or `None`, the size, in bytes, that we want to
            format as a human-readable size string.
    zUnknown size))PiB           )TiB        )GiB   @)MiB   )KiB   g      ?z.2f z bytes)floatint)size_in_bytes
_NAME_LISTname
size_bytesvalues        7lib/python3.11/site-packages/datasets/utils/py_utils.pysize_strr4   C   s      ~aaaJ-((M& ) )j
*C<<((($((((( -  ((((    sizereturnc                    t          | t                    r| S |                                                     d          rt          | dd                   dz  S |                                                     d          rt          | dd                   dz  S |                                                     d          rt          | dd                   dz  S |                                                     d	          rt          | dd                   d
z  S |                                                     d          rt          | dd                   dz  S |                                                     d          r6t          | dd                   dz  }|                     d          r|dz  n|S |                                                     d          r6t          | dd                   dz  }|                     d          r|dz  n|S |                                                     d          r6t          | dd                   dz  }|                     d          r|dz  n|S |                                                     d          r6t          | dd                   dz  }|                     d          r|dz  n|S |                                                     d          r6t          | dd                   dz  }|                     d          r|dz  n|S t	          d|  d          )a)  
    Converts a size expressed as a string with digits an unit (like `"50MB"`) to an integer (in bytes).

    Args:
        size (`int` or `str`): The size to convert. Will be directly returned if an `int`.

    Example:

    ```py
    >>> convert_file_size_to_int("1MiB")
    1048576
    ```
    PIBNr"   TIBr$   GIBr&   MIBr(   KIBr*   PBl     I5 b   TBl    J)GBi ʚ;MBi@B KBi  z`size=zM` is not in a valid format. Use an integer followed by the unit, e.g., '5GB'.)
isinstancer-   upperendswith
ValueError)r6   int_sizes     r3   convert_file_size_to_intrL   [   s    $ zz||U## (49~~''zz||U## (49~~''zz||U## (49~~''zz||U## (49~~''zz||U## (49~~''zz||T"" AtCRCy>>V, $c 2 2@x1}}@zz||T"" AtCRCy>>V, $c 2 2@x1}}@zz||T"" AtCRCy>>U+ $c 2 2@x1}}@zz||T"" AtCRCy>>U+ $c 2 2@x1}}@zz||T"" AtCRCy>>U+ $c 2 2@x1}}@
qdqqq
r
rrr5   stringpatternc                 0   t          j        dd|          }t          j        ||           }|t          d|  d|           t	          |                                          }t          j        d|          }t          t          ||                    }|S )a  Un-format a string using a python f-string pattern.
    From https://stackoverflow.com/a/36838374

    Example::

        >>> p = 'hello, my name is {name} and I am a {age} year old {what}'
        >>> s = p.format(name='cody', age=18, what='quarterback')
        >>> s
        'hello, my name is cody and I am a 18 year old quarterback'
        >>> string_to_dict(s, p)
        {'age': '18', 'name': 'cody', 'what': 'quarterback'}

    Args:
        string (str): input string
        pattern (str): pattern formatted like a python f-string

    Returns:
        Dict[str, str]: dictionary of variable -> value, retrieved from the input using the pattern

    Raises:
        ValueError: if the string doesn't match the pattern
    z{(.+?)}z(?P<_\1>.+)NzString z doesn't match the pattern )	resubsearchrJ   listgroupsfindalldictzip)rM   rN   regexresultvalueskeys_dicts          r3   string_to_dictr]      s    . F:~w77EYuf%%F~O6OOgOOPPP&--//""F:j'**DT6""##ELr5   c                     d fdt          | t                    s |           st          |  d           |           S )zbConvert an object to its dictionary representation recursively.

    <Added version="2.4.0"/>
    c                 L    t          |           ot          | t                     S N)r   rG   typeobjs    r3   _is_dataclass_instancez&asdict.<locals>._is_dataclass_instance   s"    C  >C)>)>%>>r5   c                     |           rki }t          |           D ]W} t          | |j                            }|j        r&||j        k    s|j                            dd          r
|||j        <   X|S t          | t                    r.t          | d          r t          |           fd| D              S t          | t          t          f          r$ t          |           fd| D                       S t          | t                    r fd|                                 D             S t          j        |           S )N$include_in_asdict_even_if_is_defaultF_fieldsc                 &    g | ]} |          S  ri   .0v_asdict_inners     r3   
<listcomp>z1asdict.<locals>._asdict_inner.<locals>.<listcomp>   s#    ===A}}Q//===r5   c              3   .   K   | ]} |          V  d S r`   ri   rj   s     r3   	<genexpr>z0asdict.<locals>._asdict_inner.<locals>.<genexpr>   s-      ;;!]]1--;;;;;;r5   c                 @    i | ]\  }} |           |          S ri   ri   )rk   krl   rm   s      r3   
<dictcomp>z1asdict.<locals>._asdict_inner.<locals>.<dictcomp>   s3    OOO41aMM!$$mmA&6&6OOOr5   )r   getattrr0   initdefaultmetadatagetrG   tuplehasattrra   rS   rV   itemscopydeepcopy)rc   rY   fr2   rm   rd   s       r3   rm   zasdict.<locals>._asdict_inner   s[   !!#&& 	&FC[[ + +%gc16&:&:;;v +!)!3!3qz~~Flns7t7t!3%*F16NMU## 	&Y(?(? 	&499=======>>dE]++ 	& 499;;;;s;;;;;;T"" 	&OOOO399;;OOOO=%%%r5   z is not a dict or a dataclass)rG   rV   	TypeError)rc   rm   rd   s    @@r3   asdictr      s    ? ? ?& & & & & &* c4   ?)?)?)D)D ?3===>>>=r5   c              #      K   t          | |d          }t          | ||           	 dV  t          | ||           dS # t          | ||           w xY w)z%Temporarily assign obj.attr to value.N)rt   setattr)rc   attrr2   originals       r3   temporary_assignmentr      si       sD$''HCu%T8$$$$$T8$$$$s	   = AFseedc              #     K   t           j                                        }t           j                            |            |rt          j        rddl}|j                                        }|j                            |            |j	        
                                r3|j	                                        }|j	                            |            |rt          j        rddl}ddlm} |j                                        }	|j        j                            |           }
|j                            |
           |                                st-          d          |                                }|j        }t1          |d          }|r|j        }|                    |            	 dV  t           j                            |           |rYt          j        rM|j                            |           |j	        
                                r|j	                            |           |rJt          j        r@|j                            |	           ||_        |r	||_        dS t=          |d           dS dS dS # t           j                            |           |rYt          j        rM|j                            |           |j	        
                                r|j	                            |           |rHt          j        r=|j                            |	           ||_        |r||_        w t=          |d           w w w xY w)zUTemporarily set the random seed. This works for python numpy, pytorch and tensorflow.r   N)contextzBSetting random seed for TensorFlow is only available in eager mode_rng)nprandom	get_stater   r   TORCH_AVAILABLEtorchget_rng_statemanual_seedcudais_availableget_rng_state_allmanual_seed_allTF_AVAILABLE
tensorflowtensorflow.pythonr   get_global_generator	Generator	from_seedset_global_generatorexecuting_eagerlyrJ   _seedrz   r   _set_global_seed	set_stateset_rng_stateset_rng_state_alldelattr)r   set_pytorchset_tensorflownp_stater   torch_statetorch_cuda_statestftfpycontexttf_statetemp_gen
tf_contexttf_seedtf_rng_initializedtf_rngs                  r3   	temp_seedr      sO      y""$$HINN4 -v- -l0022  &&&:""$$ 	- %
 < < > >J&&t,,, *&- *<<<<<<911339&0066
	&&x000##%% 	cabbb ((**
"$Z88 	%_F##D))),
	H%%% 	@61 	@L&&{333z&&(( @
,,->??? 	,f1 	,I**8444&J! ,"(

F+++++	, 	, 	, 	, 		H%%% 	@61 	@L&&{333z&&(( @
,,->??? 	,f1 	,I**8444&J! ,"(

F++++	, 	,s   (I6 6CL=c              #   j   K   t                      }| D ]}||vr|                    |           |V   dS )z=Iterate over iterable and return only unique values in order.N)setadd)rZ   seenr2   s      r3   unique_valuesr     sL      55D  HHUOOOKKK r5   c                       fd}|S )z4If the value is None, return None, else call `func`.c                 "    |  |           nd S r`   ri   )r2   funcs    r3   wrapperz'no_op_if_value_is_null.<locals>.wrapper  s    #/ttE{{{T9r5   ri   )r   r   s   ` r3   no_op_if_value_is_nullr     s#    : : : : : Nr5   c                 @    t          |           D ]\  }}|||fc S dS )zwReturn the index and the value of the first non-null value in the iterable. If all values are None, return -1 as index.N)N)	enumerate)iterableir2   s      r3   first_non_null_valuer   "  s:    h''  5e8OOO 8r5   c               '      K   t          t          j        |            D ]!t          fd| D                       fV  "dS )z9Iterate over items of dictionaries grouped by their keys.c              3   (   K   | ]}|         V  d S r`   ri   )rk   dkeys     r3   rp   zzip_dict.<locals>.<genexpr>.  s'      //A3//////r5   N)r   	itertoolschainry   )dictsr   s    @r3   zip_dictr   *  s`      Y_e455 0 05//////////////0 0r5   c                   6     e Zd ZdZ fdZ fdZ fdZ xZS )NonMutableDictzDict where keys can only be added but not modified.

    Will raise an error if the user try to overwrite one key. The error message
    can be customized during construction. It will be formatted using {key} for
    the overwritten key.
    c                     |                     dd          | _        |rt          d           t                      j        |i | d S )N	error_msgz$Try to overwrite existing key: {key}z1NonMutableDict cannot be initialized with kwargs.)pop
_error_msgrJ   super__init__)selfargskwargs	__class__s      r3   r   zNonMutableDict.__init__9  sZ     **2
 
  	RPQQQ$)&)))))r5   c                     || v r(t          | j                            |                    t                                          ||          S )Nr   )rJ   r   formatr   __setitem__)r   r   r2   r   s      r3   r   zNonMutableDict.__setitem__B  sG    $;;T_333<<===ww""3...r5   c                     t           fd|D                       rEt           j                            t	                     t	          |          z                      t                                          |          S )Nc              3       K   | ]}|v V  	d S r`   ri   )rk   rr   r   s     r3   rp   z(NonMutableDict.update.<locals>.<genexpr>H  s'      ((QqDy((((((r5   r   )anyrJ   r   r   r   r   update)r   otherr   s   ` r3   r   zNonMutableDict.updateG  sr    ((((%((((( 	QT_33D		CJJ8N3OOPPPww~~e$$$r5   )__name__
__module____qualname____doc__r   r   r   __classcell__)r   s   @r3   r   r   1  st         * * * * */ / / / /
% % % % % % % % %r5   r   c                       e Zd ZdZddZdS )classpropertyz5Descriptor to be used as decorator for @classmethods.Nc                 H     | j                             d |                      S r`   )fget__get__)r   rc   objtypes      r3   r   zclassproperty.__get__P  s"    /ty  w//111r5   r`   )r   r   r   r   r   ri   r5   r3   r   r   M  s.        ??2 2 2 2 2 2r5   r   c                   	
 | \  	}
}}}t          |t                    st          |
          s 	|          S |4t          j                    t          j        k     rt          j                     |7|s5t          d t          j        D                       rt          ddd           t          |t                    r|
                                n|}|||dz   nddz   t          |          z   n|}t          j        |||d|	          5 }t          |t                    r	
fd
|D             cddd           S 	
fd|D             }t          |t                    r|cddd           S t          |t                    rt          |          cddd           S t          j        |          cddd           S # 1 swxY w Y   dS )zEApply a function recursively to each element of a nested data struct.Nc              3   (   K   | ]}d |j         v V  dS )notebookNr   )rk   tqdm_clss     r3   rp   z%_single_map_nested.<locals>.<genexpr>a  s+      4r4rYaZ8CT5T4r4r4r4r4r4rr5   r+    T)endflush#rc   )disablepositionunitdescc                 B    i | ]\  }}|t          |d dd f          S NT_single_map_nested)rk   rr   rl   functiontypess      r3   rs   z&_single_map_nested.<locals>.<dictcomp>i  s8    cccVZVWYZA)8QtT4*PQQcccr5   c           
      :    g | ]}t          |d dd f          S r   r   )rk   rl   r   r   s     r3   rn   z&_single_map_nested.<locals>.<listcomp>k  s0    ___UV((AudD$)OPP___r5   )rG   rV   r   get_verbosityWARNINGset_verbosity_warningr   r   __mro__printr{   strrS   ry   r   array)r   data_structrankdisable_tqdmr   pbar_iterable	pbar_descpbarmappedr   r   s            @@r3   r   r   T  s   =A:Hk5$d k4(( %K1O1O %x$$$ G133goEE%''' 4r4reieq4r4r4r1r1rcr&&&& ,6k4+H+HYK%%'''kMNRN^t/R3>TJJdhI	m\Du[d	e	e	e 
(imk4(( 		(ccccc^bccc
( 
( 
( 
( 
( 
( 
( 
( `____Z^___F+t,, (
( 
( 
( 
( 
( 
( 
( 
( K// (V}}
( 
( 
( 
( 
( 
( 
( 
( x''
( 
( 
( 
( 
( 
( 
( 
( 
( 
( 
( 
( 
( 
( 
( 
( 
( 
(s$   $G %G2#G"GG	GTr   r  	dict_onlymap_list	map_tuple	map_numpynum_procparallel_min_lengthr   r  r   c           	      h    ]g |sJ|r j         t                     |r j         t                     |r j         t          j                   t                    t          |t                    st          |          s  |          S |	pt          j                     }	t          |t                    r!t          |	                                          n|}|d}|dk    st          |          |k     r& fdt          j        ||	|
          D             }n|t          |          k    r|nt          |          }g }t          |          D ]o}t          |          |z  }t          |          |z  }||z  t          ||          z   }||z   ||k     rdndz   }|                      |||         ||	|
f           pt          |          t          d |D                       k    r9t          dt          |           dt          d	 |D                                  t                               d
| dt          |           dd |D                         d\  }}|	st%                      ft          j        }}t)          |||          5 }|                    t,          |          }ddd           n# 1 swxY w Y   t                               d| d           d |D             }t                               dt          |           d           t          |t                    r/t          t/          |                                |                    S t          |t                    r|S t          |t                    rt          |          S t          j        |          S )a  Apply a function recursively to each element of a nested data struct.

    Use multiprocessing if num_proc > 1 and the length of data_struct is greater than or equal to
    `parallel_min_length`.

    <Changed version="2.5.0">

    Before version 2.5.0, multiprocessing was not used if `num_proc` was greater than or equal to ``len(iterable)``.

    Now, if `num_proc` is greater than or equal to ``len(iterable)``, `num_proc` is set to ``len(iterable)`` and
    multiprocessing is used.

    </Changed>

    Args:
        function (`Callable`): Function to be applied to `data_struct`.
        data_struct (`Any`): Data structure to apply `function` to.
        dict_only (`bool`, default `False`): Whether only apply `function` recursively to `dict` values in
            `data_struct`.
        map_list (`bool`, default `True`): Whether also apply `function` recursively to `list` elements (besides `dict`
            values).
        map_tuple (`bool`, default `False`): Whether also apply `function` recursively to `tuple` elements (besides
            `dict` values).
        map_numpy (`bool, default `False`): Whether also apply `function` recursively to `numpy.array` elements (besides
            `dict` values).
        num_proc (`int`, *optional*): Number of processes.
        parallel_min_length (`int`, default `2`): Minimum length of `data_struct` required for parallel
            processing.
            <Added version="2.5.0"/>
        types (`tuple`, *optional*): Additional types (besides `dict` values) to apply `function` recursively to their
            elements.
        disable_tqdm (`bool`, default `True`): Whether to disable the tqdm progressbar.
        desc (`str`, *optional*): Prefix for the tqdm progressbar.

    Returns:
        `Any`
    Nr   c           
      :    g | ]}t          |d dd f          S r   r   )rk   rc   r   r   s     r3   rn   zmap_nested.<locals>.<listcomp>  s>     
 
 
 #udD$GHH
 
 
r5   )r   r   r   c              3   @   K   | ]}t          |d                    V  dS r   Nlenrk   r   s     r3   rp   zmap_nested.<locals>.<genexpr>  s,      >>aAaD		>>>>>>r5   zHError dividing inputs iterable among processes. Total number of objects z
, length: c              3   @   K   | ]}t          |d                    V  dS r  r  r  s     r3   rp   zmap_nested.<locals>.<genexpr>  s,      ==Qs1Q4yy======r5   z	Spawning z processes for z objects in slices of c                 8    g | ]}t          |d                    S )r   r  r  s     r3   rn   zmap_nested.<locals>.<listcomp>  s)    VuVuVuefWZ[\]^[_W`W`VuVuVur5   )NN)initargsinitializerz	Finished z
 processesc                     g | ]	}|D ]}|
S ri   ri   )rk   proc_resrc   s      r3   rn   zmap_nested.<locals>.<listcomp>  s%    AAA(AA#AAAAr5   z	Unpacked z objects)appendrS   ry   r   ndarrayrG   rV   r   is_progress_bar_enabledrZ   r  r   rangeminsumrJ   loggerinfor	   set_lockr   mapr   rW   r[   r  )r   r  r  r  r  r  r  r  r   r  r   r   r
  
split_kwdsindexdivmodstartr   r  r  pools   `       `             r3   
map_nestedr.  t  sA   d } 	) #T""" $U### )RZ(((e k4(( %K1O1O %x$$$Hw'F'H'H#HL-7T-J-J[tK&&(()))P[H1}}H(;;;
 
 
 
 
|HlNNN
 
 

  (3x==8888c(mm
8__ 	a 	aEh--8+Ch--(*C%K#eS//1E#+eckkq9Cx%))<eULZ^_````x==C>>:>>>>>>>@+.x==@ @==*=====@ @   	wwwXwwVuVujtVuVuVuww	
 	
 	
 !++ 	>%*WWJkH(X;GGG 	>4XX0*==F	> 	> 	> 	> 	> 	> 	> 	> 	> 	> 	> 	> 	> 	> 	>4444555AAfAAA5F555666+t$$ $C((**F33444k4(( 	$MU++ 	$== 8F###s   KKKc                       e Zd ZddZddZdS )NestedDataStructureNc                     ||ng | _         d S r`   )datar   r2  s     r3   r   zNestedDataStructure.__init__  s     ,DD"			r5   c                     ||n j         }t          |t                    r4                     t	          |                                                    S t          |t          t          f          r fd|D             S |gS )Nc                 D    g | ]}                     |          D ]}|S ri   )flatten)rk   item	flattenedr   s      r3   rn   z/NestedDataStructure.flatten.<locals>.<listcomp>  s2    SSS$T@R@RSS9ISSSSr5   )r2  rG   rV   r6  rS   rZ   ry   r3  s   ` r3   r6  zNestedDataStructure.flatten  s~    'ttTYdD!! 	<<T[[]] 3 3444tUm,, 	SSSS$SSSS6Mr5   r`   )r   r   r   r   r6  ri   r5   r3   r0  r0    s<        5 5 5 5     r5   r0  .c                     	 t          t          j                            |                    j        }n# t
          $ r Y dS w xY w| |k     S r   )r   ospathabspathfreeOSError)needed_bytes	directory
free_bytess      r3   has_sufficient_disk_spacerC    sT    	 : :;;@

   tt*$$s   14 
AAurl_pathc                    t          |           }d}|j        dv r|j        dk    rd| v r?|                     d          st	          d|  d          |                     dd          } nZ|j        d	d         }d
|v r|                    d
          n|df\  }}|                    d          \  }}d| d| d| d} | d| }| |fS )zMConvert a link to a file on a github repo in a link to the raw github object.N)httphttpss3z
github.comblobz.pyzExternal import from github at z) should point to a file ending with '.py'rawr   z/tree/master/zhttps://github.com/z	/archive/z.zip-)r   schemenetlocrI   rJ   replacer<  split)rD  parsedsub_directorygithub_path	repo_infobranch
repo_owner	repo_names           r3   _convert_github_urlrY    s   hFM}///FM\4Q4QX$$U++ x !v8!v!v!vwww''66HH !+abb/K?G;?V?V 1 1( ; ; ;]hjr\sIv$-OOC$8$8!J	ZZZZ)ZZfZZZH(33633M]""r5   	file_pathc                   	 g }t          | d          5 }|                    |                                           ddd           n# 1 swxY w Y   t                              d|  d           g }d}|D ] }t          j        d|          }t          |          dk    r| }|r1t          j        d	|t
          j	        
          		$t          j        d|t
          j	        
          		x	
                    d          rt          	fd|D                       r	
                    d          rU	
                    d          }t          |          \  }}|                    d	
                    d          ||f           	
                    d          r?|                    d	
                    d          	
                    d          df           i	
                    d          rC	
                    d          }|                    d	
                    d          |df           |                    d	
                    d          	
                    d          df           |S )a  Find whether we should import or clone additional files for a given processing script.
        And list the import.

    We allow:
    - library dependencies,
    - local dependencies and
    - external dependencies whose url is specified with a comment starting from "# From:' followed by the raw url to a file, an archive or a github repository.
        external dependencies will be downloaded (and extracted if needed in the dataset folder).
        We also add an `__init__.py` to each sub-folder of a downloaded folder so the user can import from them in the script.

    Note that only direct import in the dataset processing script will be handled
    We don't recursively explore the additional import to download further files.

    Example::

        import tensorflow
        import .c4_utils
        import .clicr.dataset-code.build_json_dataset  # From: https://raw.githubusercontent.com/clips/clicr/master/dataset-code/build_json_dataset
    zutf-8)encodingNz	Checking z for additional imports.Fz[\s\S]*?"""[\s\S]*?r   z=^import\s+(\.?)([^\s\.]+)[^#\r\n]*(?:#\s+From:\s+)?([^\r\n]*))flagszQ^from\s+(\.?)([^\s\.]+)(?:[^\s]*)\s+import\s+[^#\r\n]*(?:#\s+From:\s+)?([^\r\n]*)c              3   V   K   | ]#}|d                               d          k    V  $dS )r   r   N)group)rk   impmatchs     r3   rp   zget_imports.<locals>.<genexpr>C  s6      ??3q6U[[^^+??????r5      externalr   internallibrary)openextend	readlinesr$  debugrP   rU   r  ra  	MULTILINEr_  r   rY  r  )
rZ  linesr~   importsis_in_docstringlinedocstr_start_matchrD  rS  ra  s
            @r3   get_importsrp    s   ( E	i'	*	*	* $aQ[[]]###$ $ $ $ $ $ $ $ $ $ $ $ $ $ $ LL@Y@@@AAA9;GO )R )RZ(>EE!""a'' #21O 	 Y[_gigsttt=Hdl  E
 };;q>> 	R????w????? {{1~~ S ;;q>>*=h*G*G'-
EKKNNHmTUUUUQ S
EKKNNEKKNNDQRRR{{1~~ R ;;q>>	5;;q>>8TJKKKK	5;;q>>5;;q>>4PQQQQNs   (A		AAc                       e Zd ZdZej                            ej        j        	                                          ZddZ
d ZdS )PicklerzHSame Pickler as the one from dill, but improved for notebooks and shellsTc                   	
 t          |          }|t          j        vrt          j        t          j        d          k     rd n7t          j        j        d d         t          j        d          j        k    rd |j        |j	        fdk    r1	 dd l
	t          |          	fd            }n# t          $ r Y nw xY w|j        |j	        fdk    r1	 dd lt          |          fd	            }n# t          $ r Y nw xY w|j        |j	        fd
k    r1	 dd lt          |          fd            }ny# t          $ r Y nmw xY w|j                            d          rOt!          d |j        D                       r1	 dd l
t          |          
fd            }n# t          $ r Y nw xY wt&          j                            | ||           d S )N0.3.6c                 N    t           j        j                            |           d S r`   )dill_dilllogr%  picklermsgs     r3   dill_logzPickler.save.<locals>.dill_logd  s     JN'',,,,,r5   rb  c                 P    t           j        j                            | |           d S r`   )rv  rw  r$  tracery  s     r3   r|  zPickler.save.<locals>.dill_logi  s#    J%++GS99999r5   )_regexPatternr   c                      | d|            |j         |j        f}|                     j        ||            | d           d S )NzRe: rb   z# Re)rN   r]  save_reducecompile)rz  rc   r   r|  rX   s      r3   _save_regexz!Pickler.save.<locals>._save_regexp  sb     ,,,777KI   ++EM4S+III &111r5   )r   Tensorc                     fd} | d|            |                                                                                                 f}|                     |||            | d           d S )Nc                 .                         |           S r`   )
from_numpy)np_arrayr   s    r3   _create_tensorz:Pickler.save.<locals>._save_tensor.<locals>._create_tensor  s    #(#3#3H#=#==r5   zTo: rb   z# To)detachcpunumpyr  )rz  rc   r  r   r|  r   s       r3   _save_tensorz"Pickler.save.<locals>._save_tensor  s    > > > > > !,,,777 #

 0 0 2 2 8 8 : :<++NDc+JJJ &111r5   )ztiktoken.coreEncodingc                      | d|            |j         |j        |j        |j        f}|                     j        ||            | d           d S )NzEnc: rb   z# Enc)r0   _pat_str_mergeable_ranks_special_tokensr  r  )rz  rc   r   r|  tiktokens      r3   _save_encodingz$Pickler.save.<locals>._save_encoding  sh     -#--888 ##,8LcNab++H,=t+MMM '222r5   z
spacy.langc              3   :   K   | ]}|j         |j        fd k    V  dS ))zspacy.languageLanguageN)r   r   )rk   clss     r3   rp   zPickler.save.<locals>.<genexpr>  sJ       F FUX.2PPF F F F F Fr5   c                     fd} | d|            |j         |                                f}|                     |||            | d           d S )Nc                     j                             | d         d                   }|                    |           }|                    |          S )Nnlplang)utilget_lang_classfrom_config
from_bytes)r   
bytes_datalang_clsr  spacys       r3   _create_langz6Pickler.save.<locals>._save_lang.<locals>._create_lang  sF    ',z'@'@vAV'W'WH"*"6"6v">">C#&>>*#=#==r5   zSp: rb   z# Sp)r   to_bytesr  )rz  rc   r  r   r|  r  s       r3   
_save_langz Pickler.save.<locals>._save_lang  s|    > > > > >
 !,,,777 #
CLLNN;++L$C+HHH &111r5   )save_persistent_id)ra   rr  dispatchr   DILL_VERSIONr   parsereleaser   r   rX   pklregisterImportErrorr   r  
startswithr   r   r  rv  save)r   rc   r  obj_typer  r  r  r  r|  rX   r  r  r   s           @@@@@r3   r  zPickler.save^  s   997+++"W]7%;%;;;- - - - $,RaR0GM'4J4J4RRR: : : #X%67;PPP LLL **     +*  #   D%x'89=PPP LLL **	 	 	 	 	 +*	 	 #   D%x'89=ZZZ#OOO **     +*  #   D$//== # F F\d\lF F F C C  LLL **
 
 
 
 
 +*
 
 #   D 	$8JKKKKKsH   B4 4
C CC7 7
DDD: :
EEF# #
F0/F0c                 z    t          |          t          k    r"t          j                            | |           d S d S r`   )ra   r  rv  rr  memoize)r   rc   s     r3   r  zPickler.memoize  s9    99L  s+++++ r5   N)T)r   r   r   r   rv  rw  MetaCatchingDictrr  r  r|   r  r  ri   r5   r3   rr  rr  Y  sl        RRz**4<+@+E+E+G+GHHHUL UL UL ULn, , , , ,r5   rr  c                 N    t          |d                              |            dS )zpickle an object to a fileT)recurseN)rr  dumprc   files     r3   r  r    s(    D$$$S)))
Fr5   c              #   2  K   	 dd t          |           j        D             v rZt          | d          rJt          | j        t
                    r0t          | di           5  d V  d d d            d S # 1 swxY w Y   d S d V  d S # t          $ r d V  Y d S w xY w)NPreTrainedTokenizerBasec                     g | ]	}|j         
S ri   r   )rk   
base_classs     r3   rn   z$_no_cache_fields.<locals>.<listcomp>  s    )b)b)b***=)b)b)br5   cache)ra   r   rz   rG   r  rV   r   r  rb   s    r3   _no_cache_fieldsr    s     %)b)bPTUXPYPYPa)b)b)bbbW%% c39d++ c &c7B77                    EEEEE   s<   AB A1$B 1A55B 8A59B >B BBc                     t                      }t          |           5  t          | |           ddd           n# 1 swxY w Y   |                                S )zpickle an object to a stringN)StringIOr  r  getvaluer  s     r3   dumpsr    s    ::D	#		  S$              ==??s   ;??c                       fd}|S )Nc                 &    | t           j        <   | S r`   )rr  r  )r   ts    r3   proxyzpklregister.<locals>.proxy  s    "r5   ri   )r  r  s   ` r3   r  r    s#         Lr5   rt  c                    t           j        j                            d|            |j                            d          st          |j                            t          j	        j
                            dk    rB|j                            t          j	        j
                  d                             d          s|j        dk    rdn#t          j	                            |j                  }d}t           j        j        rt          |d          rY|j        |j        |j        |j        |j        |j        |j        |j        |j        |j        ||j        ||j        |j        |j        f}n|j        |j        |j        |j        |j        |j        |j        |j        |j        ||j        ||j        |j        |j        f}nL|j        |j        |j        |j        |j        |j        |j        |j        ||j        ||j        |j        |j        f}|                     t:          ||	           t           j        j                            d
           dS )z
        From dill._dill.save_code
        This is a modified version that removes the origin (filename + line no.)
        of functions created in notebooks or shells for example.
        zCo: <r   r@   
ipykernel_<lambda>r   co_posonlyargcountrb   # CoN)rv  rw  rx  r%  co_filenamer  r  rQ  r;  r<  sepco_namebasenamePY3rz   co_argcountr  co_kwonlyargcount
co_nlocalsco_stacksizeco_flagsco_code	co_constsco_namesco_varnames	co_lnotabco_freevarsco_cellvarsr  r   rz  rc   r  co_firstlinenor   s        r3   
_save_coder    s    	
L3LL)))& ))#..3 CO))"'+6677!;;O))"'+66r:EElSS < {j(( B !!#/22 	 :> 6	s011 $O*)N$LKMLOK"MOO!( O)N$LKMLOK"MOO&  D  	Hd444
F###r5   rb  c                    t           j        j                            | d|           |j                            d          st          |j                            t          j	        j
                            dk    rB|j                            t          j	        j
                  d                             d          s|j        dk    rdn#t          j	                            |j                  }d}t          |d          rx|j        |j        |j        |j        |j        |j        |j        |j        |j        |j        |j        ||j        |j        ||j        |j        |j        |j        |j        |j        f}nt          |d	          rl|j        |j        |j        |j        |j        |j        |j        |j        |j        |j        |j        ||j        |j        ||j        |j        |j        |j        f}n*t          |d
          r_|j        |j        |j        |j        |j        |j        |j        |j        |j        |j        |j        ||j        ||j        |j        |j        f}nt          |d          rY|j        |j        |j        |j        |j        |j        |j        |j        |j        |j        ||j        ||j        |j        |j        f}nR|j        |j        |j        |j        |j        |j        |j        |j        |j        ||j        ||j        |j        |j        f}|                      t           j        j!        ||           t           j        j                            | d           d S )NzCo: %sr  r   r@   r  r  r   co_endlinetableco_exceptiontableco_linetabler  rb   r  )"rv  rw  r$  r~  r  r  r  rQ  r;  r<  r  r  r  rz   r  r  r  r  r  r  r  r  r  r  r  co_qualnamer  r  co_columntabler  r  r  r  _create_coder  s        r3   	save_coder  E  sG   
3777, ))#..3 CO))"'+6677!;;O))"'+66r:EElSS < {j(( B !!#/22 	  3)** f	&%  #"%+DD. S-.. N	&%  %'DD* S.)) 8	&%  #DD& S.// $	&% !DD( % D$ 	DJ3TsCCC
000r5   z0.3.5c           
      J   t           j                            |          st           j        j                            d|            t          | dd          r_t           j        j        } ||dd          }t          |          t           j        j	        v rt           j        j
        r|j        n|j        }nt           j        j
        r|j        n|j        }t          t          |                                                    }t          | dd          }t          | dd          }t          |          t           j        j	        v o|du}t!          t           j        j	                  |ft           j        j	        t          |          <   t           j        j
        rdt          |j        d	d
          v o|du}|rd| _        |rd| _        t          |dd          }|                     t           j        j        |j        ||j        |j        |j        |j        |f|           ndt          |j        d	d
          v o|duot          | dd          }|rd| _        |rd| _        |                     t           j        j        |j        ||j        |j        |j        |j        f|           |r|| _        |r|| _        t           j        j        r"|s |s
|s|s|s|s|r|                                  t           j        j                            d           nt           j        j                            d|            t          |dt          |dd                    }	t           j        j         !                    | ||	           t           j        j                            d           dS )z
        From dill._dill.save_function
        This is a modified version that make globs deterministic since the order of
        the keys in the output dictionary of globalvars can change.
        zF1: _recurseFTr  builtin_byrefNr   r  ri   __kwdefaults__rb   # F1zF2: r   r   r0   # F2)"rv  rw  _locate_functionrx  r%  rt   detect
globalvarsidstackr  __globals__func_globalsrV   sortedr{   r  __code__r  r  r  _create_functionr   __defaults____closure____dict__	func_code	func_namefunc_defaultsfunc_closureOLDER
clear_memoStockPicklersave_global)
rz  rc   r  globsr  r  _memo_superfkwdefaultsr0   s
             r3   save_functionr    s    z**3// ?	(JNs---w
E22 P![3
"
3dCCCc77dj.../3z~SCOO3CSE+/:>Os?O ..//EWh55Fw
D99HWW
 00Lxt7KE(+DJ,<(=(=s(BDJRWW%z~ !WS\:r%J%JJdQW_cQc *%)GN -',G$%c+;TBB##J/\5#,8H#/[^[gitu $     z2 F FF <t+<U;; 
  *%)GN -',G$##J/]E3=#:KSM]_b_kl $   
  (!' ,#+ 
 %% % %+% 05% ?E% NS% X`%
 ""$$$JN''''JNs---3Z0N0NOODJ#//4/HHHJN'''r5   c           	      H   t           j                            ||           st           j        j                            d|z             t          | dd           }t          | dd           }t          | dd           }t          | dt           j        j                  }g }|rddlm}  ||dd	          }d
|j	        i}	nt           j        j
        r|j        n|j        }|r"||j        u rt          | d|          j        }|}	nK|@|j	        9t          t           j                            |j	        d          dd           |u r|}	n	d
|j	        i}	|	|u }
t          t!          |	                                                    }	|
r|	}n0|.t          t!          |                                                    }||	|urt           j        j
        rd |                                D             }nd |                                D             }|D ]6}||v r0||                             t           j        j        |	|ff            n)7|                    t           j        j        |	|ff           t           j        j
        r|j        }i }dD ]}t          ||d           }||||<   |j        |j        k    r
|j        |d<   d
|	vs|j	        |	d
         k    r
|j	        |d<   |j        }t3          |          t          ur||d<   d }|r||f}t           j                            | t           j        j        |j        |	|j        |j        |f|f||           n|j        }|j        $|                    t@          |d|j        ff           d
|	vs|j	        |	d
         k    r$|                    t@          |d|j	        ff           |j        r$|                    t@          |d|j        ff           t           j                            | t           j        j        |j!        |	|j"        |j#        |ff||           |rtI          tK          |                                          d           }|r|r|D ]}t@          |d|ff}	 |&                    |           n# tN          $ r Y 1w xY w | j(        |  t           j        j
        r$| )                    tU          dd                     t| )                    d           t           j        j                            d           nt           j        j                            d|z             t          |dt          |d
d                     }t           j        j+        ,                    | ||           t           j        j                            d           d S )NF1: %sr  	_postproc_main_modified_original_mainr   r  Tr  r   _mainr  c                 ,    h | ]}t          |          S ri   r  rk   gs     r3   	<setcomp>z save_function.<locals>.<setcomp>`  s    CCC!1CCCr5   c                 ,    h | ]}t          |          S ri   r  r  s     r3   r  z save_function.<locals>.<setcomp>b  s    GGG!1GGGr5   r   r  __annotations__r   r   rc   postproc_listr   cell_contents0UTF-8r  F2: %sr  r  )-rv  rw  r  rx  r%  rt   __builtin__dill.detectr  r   r  r  r  r  _import_modulerV   r  r{   rZ   
itervaluesr  	_setitemsr  r   r   ra   _save_with_postprocr  r  r  r  r   r   r  r   r  nextiterremoverJ   r  writebytesr  r  )rz  rc   r  r  r  r  r  r  
globs_copyr  globs_is_globs_copyglob_idsstack_elementclosure
state_dict	fattrnamefattrstatetopmost_postproccellpossible_postprocr0   s                         r3   r  r  %  s   z**388 H	(JN3///w
D99Hd;;I$W.>EEN$W.>
@VWWNM 9222222'ZT4HHH
 $S^404
TS__CDT
 " 9jN4K&K&K!('>!J!J!SJ&EE *2
 9 9#.$ O OQ[]abbfppp&EE'8E #(:"5..//E" >"

'!&)9)9););"<"<==
 %%z*A*A :> HCCz/@/@/B/BCCCHHGGz/D/D/F/FGGGH%. V VM$00!-0779MPUWaOb8cddd 1 "(($**>
@S)TUUUz~ +/
!Q 6 6I#CD99E(05
9-#s|33141AJ~.U**cnj@Q.Q.Q/2~J|,;;d**-2Jz* E .!:-E
..
3uclC<LgV
 "/ / 	 	 	 	 *;*!(('CCK3P)QRRRU**cnj@Q.Q.Q!(('Cs~3V)WXXX< U!(('CS\3R)STTT
..Z03=%X[Xikr2st"/	 /     /#'Y-=-=-?-?(@(@$#G#G  // / ' / /-4t_c6R,S)%,334EFFFF) % % %$H% ,+->??:> /#MM%W*=*=>>>>#MM#....JN''''JN3///3Z0N0NOODJ#//4/HHHJN'''s   0R
RRc           	      
   t           j                            ||           st          |j                  t
          ur	t          |dd           }|t           j        j        j        }t           j        	                    |d          }d}	 t           j        
                    ||j                  \  }}t          |dd           |u rd}n# t          $ r Y nw xY w|rlt           j        j                            | d|           |                     t          |df|           t           j        j                            | d           d S t           j        j                            | d	|           t          | d
d           }t          | dd           }t          | dd           }	t          | dt           j        j                  }
g }|rddlm}  ||dd          }d|j        i}nv|j        }|	r"||
j        u rt          | d|
          j        }|}nK|@|j        9t          t           j        	                    |j        d          dd           |u r|}n	d|j        i}||u }t+          t-          |                                                    }|r|}n0|.t+          t-          |                                                    }|||urd |                                D             }|D ]6}||v r0||                             t           j        j        ||ff            n)7|                    t           j        j        ||ff           |j        }i }dD ]}t          ||d           }||||<   |j        |j        k    r
|j        |d<   d|vs|j        |d         k    r
|j        |d<   |j        }t          |          t*          ur||d<   d }|r||f}t           j                            | t           j        j        |j        ||j        |j        |f|f||           |rt?          tA          |                                          d           }|rg|re|D ]b}tB          |d|ff}	 |"                    |           n# tF          $ r Y 1w xY w | j        |  | $                    tK          dd                     ct           j        j                            | d           nt           j        j                            | d|           t          |dt          |dd                     }t           j        j&        '                    | ||           t           j        j                            | d           d S )Nr   T)safeF__func__zF3: %srb   z# F3r  r  r  r  r  r   r  r  r   r  r  c                 ,    h | ]}t          |          S ri   r  r  s     r3   r  z save_function.<locals>.<setcomp>  s    ???aBqEE???r5   r  r   r  r  r  r  r  r   r  r  )(rv  rw  r  ra   r  r   rt   r!  r   r#  _getattributer   AttributeErrorr$  r~  r  r"  r  r   r  r  rV   r  r{   rZ   r  r%  r  r&  r  r  r'  r(  r   r)  rJ   r*  r+  r  r  )rz  rc   module_namemodule_pypy_builtinfound_r  r  r  r  r  r  r,  r  r-  r.  r/  r0  r1  r2  r3  r4  r5  r6  r7  r0   s                              r3   r  r    s   z**388 E	5CL!!11 &c<>>&"&*"8"AK22;T2JJ %#z77@PQQHE1uj$773>>(,%   D ! J%++GXsCCC''%1D#'NNNJ%++GV<<<FJ##GXs;;;w
D99Hd;;I$W.>EEN$W.>
@VWWNM 9222222'ZT4HHH
 $S^4 _
 " 9jN4K&K&K!('>!J!J!SJ&EE *2
 9 9#.$ O OQ[]abbfppp&EE'8E #(:"5..//E" >"

'!&)9)9););"<"<==
 %%z*A*A @?:+<+<+>+>???%. V VM$00!-0779MPUWaOb8cddd 1 "(($**>
@S)TUUUoGJM 2 2	Y55$,1Jy)3<//-0-=
>*&&#.E*<M*M*M+.>
<(LEE{{$&&).
:& *z)J**,s|UCLRURbdk.lnst+	 +     ;#'Y-=-=-?-?(@(@$#G#G  ;/ ; ' 
; 
;-4t_c6R,S)%,334EFFFF) % % %$H% ,+->??eC&9&9::::J##GV4444J##GXs;;;3Z0N0NOODJ#//4/HHHJ##GV444s$   =C	 	
CCQ22
Q?>Q?c                     t          j        | j        | j        | j        | j        | j                  }| j        |_        |S r`   )r   r   r  r  r   r  r  r  )r   rY   s     r3   copyfuncrD  ?  s9    t/?PTPacgcsttF /FMr5   Yqueuer   .r   c                 \    t           |di |          D ]\  }} | j        |           |S )Nri   )r   put)rF  r   r   r   rY   s        r3   _write_generator_to_queuerI  H  sC    tt~~f~~..  	6	&Hr5   r-  kwargs_iterablec             #      K   t           t          j        j                  rt          nt
          j        } |            5 }|                                 fd|D             }	 	 	  j        d          V  n:# t          $ r- t          d |D                       r j
                    rY nY nw xY wP	 d |D              n# d |D              w xY w	 d d d            d S # 1 swxY w Y   d S )Nc                 L    g | ] }                     t          |f          !S ri   )apply_asyncrI  )rk   r   r   r-  rF  s     r3   rn   z&iflatmap_unordered.<locals>.<listcomp>W  s>     
 
 
SYD6f8MNN
 
 
r5   T皙?timeoutc              3   >   K   | ]}|                                 V  d S r`   )readyrk   async_results     r3   rp   z%iflatmap_unordered.<locals>.<genexpr>_  s.      RRL<--//RRRRRRr5   c                 :    g | ]}|                     d           S )rN  rO  )rx   rS  s     r3   rn   z&iflatmap_unordered.<locals>.<listcomp>c  s)    NNN\d++NNNr5   )rG   multiprocessingr-  r   r   multiprocessQueuerx   r
   allempty)r-  r   rJ  manager_clsmanagerasync_resultsrF  s   ``    @r3   iflatmap_unorderedr^  N  s      (o.B.GHHb''lNbK	 O'
 
 
 
 
 
]l
 
 
		O#%)D1111111   RRMRRRRR WbW\WbWdWd 
  ONNNNNNNNNNNNNNO O O O O O O O O O O O O O O O O OsN   %C&(C*A>=C>2B50C4B55C:C&CC&&C*-C*)FF)	FTFFNr   NTN)r9  )er   r|   	functoolsr   multiprocessing.poolrV  r;  rF  rP   r   
contextlibr   dataclassesr   r   ior   r  r   r   r	   r
   shutilr   r   r   typingr   r   r   r   r   r   r   r   r   urllib.parser   rv  rW  multiprocess.poolr  r   	packagingr   	tqdm.autor   r   r   r   typing_extensions_typing_extensionsr   r   r  
get_loggerr   r$  	lru_cacher  r4   r-   r  rL   r]   r   r   r   r   r   r   r   rV   r   propertyr   r   boolry   r.  r0  rC  rY  rp  rr  r  r  r  r  r  r  r  r  r  r  rD  rE  rX  rI  r-  r^  ri   r5   r3   <module>rp     s                  				  				  % % % % % % , , , , , , , , " " " " " " 0 0 0 0 0 0 0 0 0 0             ( ( ( ( ( ( ( ( W W W W W W W W W W W W W W W W W W W W W W ! ! ! ! ! !                                     02222000000000 0 0 0+///5550 
	H	%	% 
) ) )0)s5c? )ss )s )s )s )sX3  c3h    B$ $ $N % % % 2, 2,C 2, 2, 2, 2,j      0 0 0% % % % %T % % %82 2 2 2 2H 2 2 2( ( (F " !p$ p$ucz"p$p$ p$ 	p$
 p$ p$ smp$ p$ E?p$ p$ 3-p$ 	p$ p$ p$ p$f       % % % %## #%Xc]0B*C # # # #&F3 F5c3);#< F F F FR_, _, _, _, _,dl _, _, _,D           
w////[\ \ \ \|  !$g(>(>(FFF[O O Od 
w////[F F F FP  !$g(>(>(FFF[J J J JX  !$g(>(>(FFF[G G GT   GCLLU[ hqkAQ8R \` eh    O
$)<+<+AA
BO
3#
$O d^	O
 a[O O O O O Os   B' 'B54B5