
    +gd                         d dl Z d dlZd dlZd dlmZmZ d dlmZ d dlm	Z	 dZ
dZg dZg dZd	efd
Z G d de          ZdS )    N)ArgumentParser	Namespace)BaseDatasetsCLICommand)
get_loggerz><<<<<<< This should probably be modified because it mentions: z=======
>>>>>>>
)TextEncoderConfigByteTextEncoderSubwordTextEncoderencoder_configmaybe_build_from_corpus
manual_dir))z
tfds\.coredatasets)ztf\.io\.gfile\.GFileopen)ztf\.([\w\d]+)zdatasets.Value('\1'))ztfds\.features\.Text\(\)zdatasets.Value('string'))ztfds\.features\.Text\(zdatasets.Value('string'),)z+features\s*=\s*tfds.features.FeaturesDict\(zfeatures=datasets.Features()ztfds\.features\.FeaturesDict\(zdict()zThe TensorFlow Datasets AuthorszDThe TensorFlow Datasets Authors and the HuggingFace Datasets Authors)ztfds\.z	datasets.)zdl_manager\.manual_dirzself.config.data_dir)zself\.builder_configzself.configargsc                 6    t          | j        | j                  S )zz
    Factory function used to convert a model TF 1.0 checkpoint in a PyTorch checkpoint.

    Returns: ConvertCommand
    )ConvertCommand	tfds_pathdatasets_directory)r   s    9lib/python3.11/site-packages/datasets/commands/convert.pyconvert_command_factoryr   *   s     $.$*ABBB    c                   @    e Zd Zedefd            ZdedefdZd ZdS )r   parserc                     |                      dd          }|                    dt          dd           |                    dt          dd	           |                    t          
           dS )z
        Register this command to argparse so it's available for the datasets-cli

        Args:
            parser: Root parser to register command-specific arguments
        convertzHConvert a TensorFlow Datasets dataset to a HuggingFace Datasets dataset.)helpz--tfds_pathTzQPath to a TensorFlow Datasets folder to convert or a single tfds file to convert.)typerequiredr   z--datasets_directoryz(Path to the HuggingFace Datasets folder.)funcN)
add_parseradd_argumentstrset_defaultsr   )r   train_parsers     r   register_subcommandz"ConvertCommand.register_subcommand4   s     (([ ) 
 
 	!!d	 	" 	
 	
 	
 	!!"tBl 	" 	
 	
 	
 	!!'>!?????r   r   r   c                 J    t          d          | _        || _        || _        d S )Nzdatasets-cli/converting)r   _logger
_tfds_path_datasets_directory)selfr   r   r   s       r   __init__zConvertCommand.__init__K   s'    !";<<##5   r   c                 *   t           j                            | j                  r%t           j                            | j                  }nXt           j                            | j                  r%t           j                            | j                  }nt          d          t           j                            | j                  }| j	        
                    d| d|            g }g }i }t           j                            | j                  rt          j        |          }n%t           j                            | j                  g}|D ]}| j	        
                    d|            t           j                            ||          }t           j                            ||          }	t           j                            |          rd|v sd|v sd|vr| j	        
                    d           t          |d	
          5 }
|
                                }d d d            n# 1 swxY w Y   g }d}d}g }|D ]}|dv r
dv rdv rdv rdndv rd"dv rdndv r                    dd          nt#          fdt$          D                       rd}t'          t)          fdt$                              }|                    t,          t/          |          z   dz              |                               |                    t0                     t2          D ]\  }}t5          j        ||          dv rrt5          j        d          }|                    d |                    d                              d          D                        d |                    d          z   d!v sd"v sdv r$t          d#                                            d$v sd%v rd}|                               |sd&|v r|                    dd          }t           j                            ||          t           j                            |          }	t          j!        d'           | j	        
                    d(            |"                    fd)|D                        n|                    |	           |r|                    |	           t          |	d*d	
          5 }
|
#                    |           d d d            n# 1 swxY w Y   | j	        
                    d+|	            |D ]}	 t           j                            |          }||                    dd                   }| j	        
                    d,| d|            tI          j%        ||           t# tL          $ r! | j	        '                    d-| d.           Y w xY w|r#|D ]"}| j	        (                    d/| d0           !d S d S )1NzA--tfds_path is neither a directory nor a file. Please check path.zConverting datasets from z to zLooking at file r*   _testz.pyzSkipping filezutf-8)encodingFz!import tensorflow.compat.v2 as tfz
@tfds.corezbuilder=selfz-import tensorflow_datasets.public_api as tfdszimport datasets
zimport tensorflow zfrom absl import loggingzfrom datasets import logging
	getLoggerr   c              3       K   | ]}|v V  	d S N ).0
expressionout_lines     r   	<genexpr>z%ConvertCommand.run.<locals>.<genexpr>   s(      OOJx/OOOOOOr   Tc                     | v S r1   r2   )er5   s    r   <lambda>z$ConvertCommand.run.<locals>.<lambda>   s    a8m r   
tensorflow_datasetsz/from\stensorflow_datasets.*import\s([^\.\r\n]+)c              3   >   K   | ]}|                                 V  d S r1   )strip)r3   imps     r   r6   z%ConvertCommand.run.<locals>.<genexpr>   s*      'Y'Y		'Y'Y'Y'Y'Y'Yr      ,zfrom . import ztf.ztfds.zError converting GeneratorBasedBuilderBeamBasedBuilderwmt)exist_okzAdding directory c                     i | ]}|S r2   r2   )r3   r>   
output_dirs     r   
<dictcomp>z&ConvertCommand.run.<locals>.<dictcomp>   s    .W.W.W3sJ.W.W.Wr   wzConverted in zMoving z#Cannot find destination folder for z. Please copy manually.z!You need to manually update file z4 to remove configurations using 'TextEncoderConfig'.))ospathisdirr'   abspathisfiledirname
ValueErrorr(   r&   infolistdirbasenamejoinr   	readlinesreplaceanyTO_HIGHLIGHTlistfilterappendHIGHLIGHT_MESSAGE_PREr!   HIGHLIGHT_MESSAGE_POST
TO_CONVERTresubmatchextendgroupsplitr=   makedirsupdate
writelinesshutilcopyKeyErrorerrorwarning)r)   abs_tfds_pathabs_datasets_pathutils_fileswith_manual_updateimports_to_builder_map
file_namesf_name
input_fileoutput_fileflines	out_lines
is_builderneeds_manual_updatetfds_importsline	to_removepatternreplacementr`   dir_name
utils_filedest_folder	file_pathr5   rF   s                            @@r   runzConvertCommand.runQ   s   7==)) 	bGOODO<<MMW^^DO,, 	bGOODO<<MM`aaaGOOD,DEE\m\\IZ\\]]]!#7==)) 	=M22JJ'**4?;;<J  N	= N	=FL999:::mV<<J',,'8&AAK7>>*-- v1E1ETZIZIZ^ckq^q^q!!/222j7333 &q& & & & & & & & & & & & & & & IJ"'L ++ ++ 7(BB!X--#x//DPP2HH(H44!H/8;;?HH H,,'//\JJHHOOOO,OOOOO 	J*.' $V,C,C,C,C\%R%R S SI$$%:S^^%Kd%RSSS$$X...$$%;<<<0: J J,#%6';#I#I )H44H%WYabbE '''Y'Yu{{1~~?S?STW?X?X'Y'Y'YYYY/%++a..@H H$$8(;(;?TX`?`?`$%K9I9I%K%KLLL*h66:LPX:X:X!%J  **** 
0Uf__!>>%44W\\*;XFF
 gll:v>>J6666!!"Bj"B"BCCC&--.W.W.W.W,.W.W.WXXXX "";///" 7"))+666k3999 (QY'''( ( ( ( ( ( ( ( ( ( ( ( ( ( (L;k;;<<<<% 	n 	nJn))*554V^^E25N5NO!!"IK"I"IZ"I"IJJJJ4444 n n n""#l#l#l#lmmmmmn  	/  	$$w	www   	 	 s7   H&&H*	-H*	<TT"	%T"	A0V>>(W)(W)N)	__name__
__module____qualname__staticmethodr   r$   r!   r*   r   r2   r   r   r   r   3   st        @N @ @ @ \@,6# 63 6 6 6 6r r r r rr   r   )rI   r^   rg   argparser   r   datasets.commandsr   datasets.utils.loggingr   r[   r\   rW   r]   r   r   r2   r   r   <module>r      s    				 				  . . . . . . . . 4 4 4 4 4 4 - - - - - - ]      
"C) C C C CP P P P P+ P P P P Pr   