U
    Lei                     @   s   d dl Z d dlmZ d dlmZ d dlmZ eddgdZe jede 	 d	d
 Z
e
de jddddde jdddddde jddddde jddddde jdddd dd!d" ZdS )#    N)tqdm)word_tokenize)parallelize_preprocessz-hz--help)Zhelp_option_names)Zcontext_settingsc                   C   s   d S )N r   r   r   g/mounts/lovelace/software/anaconda3/envs/qiime2-amplicon-2024.2/lib/python3.8/site-packages/nltk/cli.pycli   s    r   tokenizez
--languagez-lenz1The language for the Punkt sentence tokenization.)defaulthelpz--preserve-lineTzIAn option to keep the preserve the sentence and not sentence tokenize it.)r
   Zis_flagr   z--processesz-j   zNo. of processes.z
--encodingz-eutf8zSpecify encoding of file.z--delimiterz-d z%Specify delimiter to join the tokens.c           	   
   C   s   t jd|d}t jd|dh}|dkrTt| D ]}t|t|d|d q4n.tt| |ddD ]}t||d|d qhW 5 Q R X W 5 Q R X d	S )
z;This command tokenizes text stream using nltk.word_tokenizestdin)encodingstdoutr   
)endfileT)progress_barN)clickZget_text_streamr   	readlinesprintjoinr   r   )	languageZpreserve_lineZ	processesr   	delimiterZfinZfoutlineoutliner   r   r   tokenize_file   s       
r   )r   r   Znltkr   Z	nltk.utilr   dictZCONTEXT_SETTINGSgroupZversion_optionr   commandoptionr   r   r   r   r   <module>   s<   
   