U
    ėžŌdØ5  ć                   @   s   d Z ddlZddlZddlZddlZddlZddlZddlm	Z	 ddl
Z
ddlZddlmZ ddlmZ ddlmZmZ e e”ZG dd dZdS )	a  
.. module:: BuscoDownloadManager
   :synopsis: BuscoDownloadManager manage the version and download the most recent file
.. versionadded:: 4.0.0
.. versionchanged:: 5.4.0

Copyright (c) 2016-2023, Evgeny Zdobnov (ez@ezlab.org)
Licensed under the MIT license. See LICENSE.md file.

é    N)ŚURLError)ŚBuscoLogger)ŚLogDecorator)ŚBatchFatalErrorŚ
BuscoErrorc                   @   s   e Zd ZdZdZdd Zdd Zededd	 Z	d
d Z
edd Zdd Zdd Zedd Zededddd Zedd Zededddd ZdS )ŚBuscoDownloadManagera3  
    This class obtains and manages the version of data files required to run a BUSCO analysis.
    When the config parameter `offline` is set, no attempt to download is made.
    When the config parameter `auto_update_files` is set, new versions replace old versions.s
    Else, a warning is produced.
    Nc              
   C   s¬   |  dd”| _|  dd”| _| dd”| _| dd”| _|  ”  t| jdkrØ| jsØz|  	”  W nF t
k
r¦ } z(d| _t d |j| j”” |  	”  W 5 d}~X Y nX dS )	zz
        :param config: Values of all parameters to be used during the analysis
        :type config: BuscoConfig
        Z	busco_runŚofflinezupdate-dataŚdownload_base_urlZdownload_pathNz&https://busco-data2.ezlab.org/v5/data/z{}. Retrying with backup url {})Z
getbooleanr   Śupdate_dataŚgetr	   Ślocal_download_pathŚ_create_main_download_dirŚtypeŚversion_filesŚ_obtain_versions_filer   ŚloggerŚinfoŚformatŚvalue)ŚselfZconfigZbfe© r   ś9lib/python3.8/site-packages/busco/BuscoDownloadManager.pyŚ__init__)   s"     ’’zBuscoDownloadManager.__init__c                 C   s"   t j | j”st j| jdd d S ©NT)Śexist_ok)ŚosŚpathŚexistsr   Śmakedirs)r   r   r   r   r   ?   s    z.BuscoDownloadManager._create_main_download_dirz;Downloading information on latest versions of BUSCO data...c              	   C   sŽ   t j | jd”}t j | jd”}dD ]“}z4tj ||” tj	|ddddddgdd	t
| _W q$ tk
r~   td
 |”Y q$ tk
rÖ   | jr¤t d” Y  qŚn.|rÄt d |”” t |” ntd |”Y q$X q$d S )Nzfile_versions.tsv©é
   éd   Nś	ZdatasetŚdateŚhashŚdomainr   )ŚsepŚnamesZ	index_colzPInvalid URL. Cannot reach {}. Please provide a valid URL for --base_download_urlz7Unable to verify BUSCO datasets because of offline modeś3Download connection problem. Retrying in {} secondsśCannot reach {})r   r   Śjoinr	   r   ŚurllibŚrequestŚurlretrieveŚpdZread_csvr   r   Ś
ValueErrorr   r   r   r   r   Śwarningr   ŚtimeŚsleep)r   Śremote_filepathŚlocal_filepathŚtsleepr   r   r   r   D   s@    ü’’’’’z*BuscoDownloadManager._obtain_versions_filec                 C   s.   t j | j|”}t j |”s*t j|dd d S r   )r   r   r*   r   r   r   )r   ŚcategoryZcategory_folderr   r   r   Ś_create_category_dirj   s    z)BuscoDownloadManager._create_category_dirc              	   C   sZ   d }t | d6}|D ]*}| ”  d”}|d dkr|d } q@qW 5 Q R X |sVtd|S )NŚrś=r   Zcreation_dateé   z;Creation date could not be extracted from dataset.cfg file.)ŚopenŚstripŚsplitr   )Zdataset_config_fileŚdataset_dateZdata_configŚliner   r   r   Ś_extract_creation_dater   s    ’z+BuscoDownloadManager._extract_creation_datec              	   C   s2  zt | jj| d }W n$ tk
r<   td ||”Y nX tj |”\}}|dkrØd 	||g”}z$|  
tj 	|d””}||k}	d}
W n tk
r¤   d}	d}
Y nX nhd 	||| d”g”}| ||”}tj |”}	tj |”\}}tt d |d	d
 |dd  ””d	k}
t | jj| d }|
|	|||fS )Nr#   z!{} is not a valid option for '{}'ŚlineagesŚ.zdataset.cfgTFz{}.*.{}r   iõ’’’r:   r$   )r   r   ŚlocŚKeyErrorr   r   r   r   Śsplitextr*   r@   ŚFileNotFoundErrorŚlstripŚreplacer   ŚlenŚglob)r   r4   r6   Śdata_basenameZlatest_updateZpath_basenameŚ	extensionŚlatest_versionr>   Ś
up_to_dateŚpresentZpath_to_checkZextension_to_checkr$   r   r   r   Ś_check_existing_version   sD    
’
’’’’ū’	z,BuscoDownloadManager._check_existing_versionc                 C   sō  t j |”r*d|kr*t d ||”” |S d|kr@td |”| jrŲ|dkrt j | j	||”}t j |”rp|S td |”nXt j 
|”\}}tt t j | j	|d ||”””}t|dkrČ|d S td	 | j	”t j |”}t j | j	||”}|  |||”\}	}
}}}|
s| js"|	sŚ|  |” d
}t j | j||| ”}|	r`|dkr`|  |” dD ]j}|  ||| |”}|r¬|  || ”}|	r¤t d |””  qšn |rdt d |”” t |” qdtdn|
sšt d |”” |S )Nś/zUsing local {} directory {}z{} does not existrA   z?Unable to run BUSCO in offline mode. Dataset {} does not exist.z{}.*{}r   é’’’’zWUnable to run BUSCO placer in offline mode. Cannot find necessary placement files in {}z.tar.gzr   z0The file or folder {} was updated automatically.r(   zVFile download unsuccessful. Check the download url or consider running in offline modezThe file or folder {} is not the last available version. To update all data files to the last version, add the parameter --update-data in your next run.)r   r   r   r   r   r   r   r   r*   r   rE   ŚsortedrJ   rI   ŚbasenamerP   r
   r7   r	   Ś_rename_old_versionŚ_download_fileŚ_decompress_filer0   r1   r2   )r   Z	data_namer6   Zlocal_datasetrT   rL   Zplacement_filesrK   r4   rO   rN   rM   r$   Zcompression_extensionr3   r5   Zdownload_successr   r   r   r   Ŗ   s²      ’’’
ż’’	’’ś
  ’
  ’’’’’’’ž’zBuscoDownloadManager.getc                 C   s   t j | ”rz(t  | d | ”” t d | | ”” W n^ tk
r   z4t ” }t  | d | |”” t d | | |”” W n tk
r   Y nX Y nX d S )Nz{}.oldzRenaming {} into {}.oldz	{}.old.{}zRenaming {} into {}.old.{})	r   r   r   Śrenamer   r   r   ŚOSErrorr1   )r4   Z	timestampr   r   r   rU     s.    
’ 
’  ’’z(BuscoDownloadManager._rename_old_versionzDownloading file {}r:   )Zfunc_argc                 C   s¢   zvt j ||” t|  |”}||krdt d t|t|”” t 	d |”” t
 |” tdnt d |”” W n& tk
r   t d |”” Y dS X dS )Nz+md5 hash is incorrect: {} while {} expectedzdeleting corrupted file {}z:BUSCO was unable to download or update all necessary fileszmd5 hash is {}r)   FT)r+   r,   r-   r   Ś_md5r   Śerrorr   Śstrr   r   Śremover   Śdebugr   )r   r3   r4   Zexpected_hashZobserved_hashr   r   r   rV     s(     ’’
’z#BuscoDownloadManager._download_filec              	      sH   t  ” }t| d( t fdddD ]}| |” q&W 5 Q R X | ” S )NŚrbc                      s
      d”S )Ni   )Śreadr   ©Śfr   r   Ś<lambda>8  ó    z+BuscoDownloadManager._md5.<locals>.<lambda>rd   )ŚhashlibZmd5r;   ŚiterŚupdateZ	hexdigest)ŚfnameZhash_md5Śchunkr   ra   r   rZ   4  s
    zBuscoDownloadManager._md5zDecompressing file {}c              
   C   s  |  dd”}tj |”d dkrt tj |”d”0}t|d}|D ]}| |” qFW 5 Q R X W 5 Q R X zt |” W n t	k
r   t
 d” Y nX |}tj |”d dkr|  dd”}t |”}| tj |”” W 5 Q R X zt |” W n  t	k
r   t
 d” Y nX |}|S )	Nz.gzŚ r:   r_   ŚwbzGDownloaded gzip file was removed before this BUSCO run could remove it.z.tarzEDownloaded tarball was removed before this BUSCO run could remove it.)rH   r   r   rE   Śgzipr;   r*   Śwriter]   rF   r   r0   ŚtarfileŚ
extractallŚdirname)r   r4   Zunzipped_filenameZcompressed_fileZdecompressed_filer?   Zuntarred_filenameZtar_filer   r   r   rW   <  s2     ’
’
z%BuscoDownloadManager._decompress_file)Ś__name__Ś
__module__Ś__qualname__Ś__doc__r   r   r   Ślogr   r   r7   Śstaticmethodr@   rP   r   rU   rV   rZ   rW   r   r   r   r   r      s&   
%
)\


r   )rt   r   r1   rJ   rn   re   Zurllib.requestr+   Zurllib.errorr   rl   Zpandasr.   Zbusco.BuscoLoggerr   r   ru   Zbusco.Exceptionsr   r   Z
get_loggerrq   r   r   r   r   r   r   Ś<module>   s   
