
    +gd                         d dl Z d dlmZ d dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZmZ d dlmZ d Z G d de          ZdS )    N)ArgumentParser)Path)copyfile)List)config)DatasetBuilder)BaseDatasetsCLICommand)DownloadConfig)DownloadMode)dataset_module_factoryimport_main_class)VerificationModec                     t          | j        | j        | j        | j        | j        | j        | j        p| j        | j	        | j
        f	i |S N)RunBeamCommanddatasetname	cache_dirbeam_pipeline_optionsdata_dirall_configs	save_info
save_infosignore_verificationsforce_redownload)argskwargss     :lib/python3.11/site-packages/datasets/commands/run_beam.pyrun_beam_command_factoryr      s[    	")$/!        c                   \    e Zd Zedefd            Zdedededededed	ed
edefdZd Z	dS )r   parserc                    |                      dd          }|                    dt          d           |                    dt          d d	           |                    d
t          d d	           |                    dt          dd	           |                    dt          d d	           |                    ddd           |                    ddd           |                    ddd           |                    ddd           |                    ddd           |                    t                     d S )Nrun_beamz&Run a Beam dataset processing pipeline)helpr   zName of the dataset to download)typer%   z--namezDataset config name)r&   defaultr%   z--cache_dirz-Cache directory where the datasets are storedz--beam_pipeline_options zrBeam pipeline options, separated by commas. Example:: `--beam_pipeline_options=job_name=my-job,project=my-project`z
--data_dirz?Can be used to specify a manual directory to get the files fromz--all_configs
store_truezTest all dataset configurations)actionr%   z--save_infozSave the dataset infos filez--ignore_verificationsz0Run the test without checksums and splits checksz--force_redownloadzForce dataset redownloadz--save_infoszalias for save_info)func)
add_parseradd_argumentstrset_defaultsr   )r"   run_beam_parsers     r   register_subcommandz"RunBeamCommand.register_subcommand    s    ++J=e+ff$$YS?`$aaa$$XCLa$bbb$$@	 	% 	
 	
 	
 	$$% F	 	% 	
 	
 	
 	$$R	 	% 	
 	
 	
 	$$_\Pq$rrr$$]<Nk$lll$$$\@r 	% 	
 	
 	
 	$$%9,Uo$ppp$$^LOd$eee$$*B$CCCCCr    r   r   r   r   r   r   r   r   r   c
                     || _         || _        || _        || _        || _        || _        || _        || _        |	| _        |
| _	        d S r   )
_dataset_name
_cache_dir_beam_pipeline_options	_data_dir_all_configs_save_infos_ignore_verifications_force_redownload_config_kwargs)selfr   r   r   r   r   r   r   r   r   config_kwargss              r   __init__zRunBeamCommand.__init__A   sV      
#&;#!'%%9"!1+r    c                    dd l }| j        %| j        rt          d           t	          d           | j        | j        }}t          |          }t          |j                  }g }| j	        rC|j
        j                            d | j	                            d          D                       }nd }| j        rst          |j                  dk    r[|j        D ]R}|                     ||j        | j        |j        || j        |j                            d                               SnH|                     |d|| j        || j        |j                            d          d	| j                   |D ]}	|	                    | j        st4          j        nt4          j        t;          t<          j        
          | j         rtB          j"        ntB          j#        d           | j$        r|	$                                 t          d           | j$        rCtJ          j&        '                    |(                                t<          j)                  }
tU          |          j        dz   }tJ          j&        '                    ||          }tJ          j&        +                    |          r tJ          j&        ,                    |          }nCtJ          j&        +                    |          r|}n!t          d|
            t	          d           tJ          j&        '                    |t<          j)                  }t[          |
|           t          d|            d S d S )Nr   z?Both parameters `name` and `all_configs` can't be used at once.   c                 @    g | ]}|d |                                  S )z--)strip).0opts     r   
<listcomp>z&RunBeamCommand.run.<locals>.<listcomp>e   s/    cccc_bc)CIIKK))cccr    ,)flags	base_path)config_namer   hashbeam_optionsr   rI   )rJ   r   rL   r   rI   )r   F)download_modedownload_configverification_modetry_from_hf_gcszApache beam run successful.z.pyzDataset Infos file saved at  ).apache_beamr4   r8   printexitr3   r   r   module_pathr6   optionspipeline_optionsPipelineOptionssplitlenBUILDER_CONFIGSappendr   r7   rK   r5   builder_kwargsgetr<   download_and_preparer;   r   REUSE_CACHE_IF_EXISTSFORCE_REDOWNLOADr
   r   DOWNLOADED_DATASETS_PATHr:   r   	NO_CHECKS
ALL_CHECKSr9   ospathjoinget_imported_module_dirDATASETDICT_INFOS_FILENAMEr   isfiledirnamer   )r=   beamrf   rJ   dataset_modulebuilder_clsbuildersrL   builder_configbuilderdataset_infos_pathr   combined_pathdataset_diruser_dataset_infos_paths                  r   runzRunBeamCommand.runY   st   """":!d&7!STTTGGG M4:k/55'(BCC)+& 	 <8HHccT5P5V5VWZ5[5[ccc I  LL  L 	[%@!A!AA!E!E"-"= 
 
K$2$7!%+0%1"&/"0"?"C"CK"P"P  	 	 	 	
 OO  +!^!-"o,;??LL  ) 	 	 	   	& 	&G((-3l@@!2 .9X Y Y Y-#1"2"<"<%0 % ) 	 	 	  &##%%%+,,,
  	L!#k.Q.Q.S.SU[Uv!w!w::?U*DGLLt44Mw~~d##  good33.. "I5GIIJJJQ ')gll;@a&b&b#')@AAAJ1HJJKKKKK#	L 	Lr    N)
__name__
__module____qualname__staticmethodr   r1   r.   boolr?   rv   rQ   r    r   r   r      s        DN D D D \D@,, , 	,
  #, , , , #, , , , ,0LL LL LL LL LLr    r   )re   argparser   pathlibr   shutilr   typingr   datasetsr   datasets.builderr   datasets.commandsr	   !datasets.download.download_configr
   "datasets.download.download_managerr   datasets.loadr   r   datasets.utils.info_utilsr   r   r   rQ   r    r   <module>r      s2   				 # # # # # #                         + + + + + + 4 4 4 4 4 4 < < < < < < ; ; ; ; ; ; C C C C C C C C 6 6 6 6 6 6  FL FL FL FL FL+ FL FL FL FL FLr    