
    +gd                         d Z ddlZddlZddlZ ej        d          Z ej        d          Z ej        d          Z ej        d          ZdZ	dZ
d	 Zd
 Zd Zd ZddZddZdS )zUtilities for file names.    Nz([A-Z]+)([A-Z][a-z])z([a-z\d])([A-Z])z(?<!_)_(?!_)z(_{2,})z^\w+(\.\w+)*$z<>:/\|?*c                     t                               d|           } t                              d|           } |                                 S )z(Convert camel-case string to snake-case.z\1_\2)_uppercase_uppercase_resub_lowercase_uppercase_relowernames    /lib/python3.11/site-packages/datasets/naming.pycamelcase_to_snakecaser   !   s:    "&&x66D"&&x66D::<<    c                     t                               |           } d | D             } d                    d t          j                            |           D                       S )z/Convert snake-case string to camel-case string.c                 B    g | ]}t                               |          S  )_multiple_underscores_resplit.0ns     r
   
<listcomp>z*snakecase_to_camelcase.<locals>.<listcomp>+   s'    <<<!$**1--<<<r    c              3   J   K   | ]}|d k    |                                 V  dS )r   N)
capitalizer   s     r
   	<genexpr>z)snakecase_to_camelcase.<locals>.<genexpr>,   s6      ZZaRSWYRYRY1<<>>RYRYRYRYZZr   )_single_underscore_rer   join	itertoolschainfrom_iterabler   s    r
   snakecase_to_camelcaser   (   sY     &&t,,D<<t<<<D77ZZ9?+H+H+N+NZZZZZZr   c                     t           j                            |           | k    rt          d|            t	          |           S )N&Should be a dataset name, not a path: )ospathbasename
ValueErrorr   r   s    r
   filename_prefix_for_namer&   /   sB    	w%%H$HHIII!$'''r   c                     t           j                            |           | k    rt          d|            t	          j        t          |          st          dt           d| d          t          |            d| S )Nr!   zSplit name should match 'z'' but got 'z'.-)r"   r#   r$   r%   rematch	_split_rer&   )r	   r   s     r
   filename_prefix_for_splitr,   5   s    	w%%H$HHIII8Iu%% WUYUUEUUUVVV&t,,66u666r   c                     t          | |          }|r|d| z  }t          j                            ||          }| dS )N.*)r,   r"   r#   r   )dataset_namer   data_dirfiletype_suffixprefixfilepaths         r
   filepattern_for_dataset_splitr5   =   sM    &|U;;F ('o'''w||Hf--H>>>r   c                    t          ||          t          j                            |           |r=t	          |          fdt                    D             }rfd|D             }|S }r|d z  }|gS )Nc                 (    g | ]} d |dddS )r(   05dz-of-r   )r   shard_id
num_shardsr3   s     r
   r   z/filenames_for_dataset_split.<locals>.<listcomp>K   s5    fffCCCCC:CCCfffr   c                      g | ]
}|d  z   S )r.   r   )r   filenamer2   s     r
   r   z/filenames_for_dataset_split.<locals>.<listcomp>M   s'    TTTh$9$9$99TTTr   r.   )r,   r"   r#   r   lenrange)	r#   r0   r   r2   shard_lengths	filenamesr<   r:   r3   s	      `   @@r
   filenames_for_dataset_splitrA   E   s    &|U;;FW\\$''F 
''
fffffTYZdTeTefff	 	UTTTT)TTTI 	.-O---Hzr   )N)NN)__doc__r   r"   r)   compiler   r   r   r   r+   "INVALID_WINDOWS_CHARACTERS_IN_PATHr   r   r&   r,   r5   rA   r   r   r
   <module>rE      s            				 				 %"*%<== $"*%899 "
?33 %2:j11 	%0 "  [ [ [( ( (7 7 7        r   