U
    adF                      @   s<  d dl mZ d dlZd dlZd dlZd dlZzd dlmZ W n  ek
r\   d dl	mZ Y nX zd dl
Z
W n  ek
r   d dlm
Z
 Y nX ejd  dkrd dlZd dlmZ d dlmZ eejZdd	 Zd
d Zdd Zdd Zd&ddZdd Zdd Zdd Zdd Zdd Zdd Zd'd d!Zd"d# Z d$d% Z!dS )(    )with_statementN)OrderedDict)bz2   )qconfig)
get_loggerc                 C   s  d }t j| \}}t | t js6tjd|  ddd |dkrPtj| dd}n@|dkrrt	j
| d	d}t|}n|d
krPztj| d	d}W n8 tk
r   t \}}}tjdt| dd Y nX | }t|dkrtjd|  dd t|dkrtd|d   z||d }t|}W n$ tk
rL   tjddd Y nX n@zt| }W n2 tk
r   t \}}}tj|dd Y nX |S )NzPermission denied accessing T   )Z	to_stderrexit_with_code)z.gzz.gzipZrt)mode)z.bz2z.bzip2r)z.zipzCan't open zip file: )r	   r   z Reading %s: zip archive is emptyz3Zip archive must contain exactly one file. Using %sz=Use python 2.6 or newer to work with contigs directly in zip.   )Z	exit_code)ospathsplitextaccessR_OKloggererrorgzipopenr   ZBZ2File_read_compressed_filezipfileZZipFile	Exceptionsysexc_infostrZnamelistlenZwarningAttributeErrorIOErrorZ	exception)fpath
fasta_file_ZextZzfileexc_type	exc_valuenames r%   5lib/python3.8/site-packages/quast_libs/fastaparser.py_get_fasta_file_handler!   s>    
r'   c                 C   s&   t jd dkr"tt|  S | S )Nr   r   )r   version_infoioTextIOWrapperBytesIOread)Zcompressed_filer%   r%   r&   r   M   s    r   c                 C   s2   z| dd   d W S  tk
r,   Y dS X dS )zk
        Extracts name from fasta entry line:
        ">chr1  length=100500; coverage=15;" ---> "chr1"
    r   Nr    )split
IndexError)liner%   r%   r&   __get_entry_nameS   s    r1   c                 C   s   t  }d}d}t| }|D ]j}|ddkr8|d}n|g}|D ]@}|sLqB|d dkrr|rh|||< d}t|}qB|t| 7 }qBq|||< |  |S )ze
        Takes filename of FASTA-file
        Returns list of lengths of sequences in FASTA-file
    r   N>)r   r'   findr.   r1   r   stripclose)r   Zchr_lengthslchr_namer    raw_linelinesr0   r%   r%   r&   get_chr_lengths_from_fastafile^   s(    
r<   Fc           	      C   sz   d}i }i }t | D ]Z\}}| d }t|}||7 }tdd t|D ||< |rf|t|| 8 }|||< q|||fS )Nr   c                 s   s"   | ]\}}|d kr|d V  qdS )Nr   Nr%   ).0xsr%   r%   r&   	<genexpr>   s      z#get_genome_stats.<locals>.<genexpr>)
read_fastar.   r   set	enumerate)	fasta_fpathZskip_nsZgenome_sizeZreference_chromosomesZns_by_chromosomesnameseqr9   Zchr_lenr%   r%   r&   get_genome_stats|   s    
rH   c                 C   sH  d}d}d}d }| d }g }t | }|D ]}|ddkrH|d}	n|g}	|	D ]}
|
s\qR|
d dkr|r||||t| t|g ||7 }d}d}t|
}|t|
7 }qR|s|
}|t|
 7 }|t|
7 }qRq*W 5 Q R X ||||t| t|g t |d0}|D ]$}|ddd	 |D d
  qW 5 Q R X d S )Nr   z.fair2   r3   r4   w	c                 S   s   g | ]}t |qS r%   )r   )r>   Zfsr%   r%   r&   
<listcomp>   s     z#create_fai_file.<locals>.<listcomp>
)	r   r5   r.   appendr   r6   r1   writejoin)rE   r8   Ztotal_offsetZ
chr_offsetr9   Z	fai_fpathZ
fai_fieldsZin_fr:   r;   r0   Zchr_lineZout_fZfieldsr%   r%   r&   create_fai_file   s<    
  rP   c                 C   sz   t j|st | d}t| D ]F}|d dkrZ|r>|  tt j|t|d d}|r"|| q"|rv|  dS )z
        Takes filename of FASTA-file and directory to output
        Creates separate FASTA-files for each sequence in FASTA-file
        Returns nothing
        Oops, similar to: pyfasta split --header "%(seqid)s.fasta" original.fasta
    Nr   r4   z.farI   )	r   r   isdirmkdirr   r7   rO   r1   rN   )r   Zoutput_dirpathZoutFiler0   r%   r%   r&   split_fasta   s    
rS   c                 c   s   d}g }d}t | }|D ]^}|d}|D ]J}|s4q*|d dkrf|sT|d|fV  d}t|}g }q*||  q*q|s|r|d|fV  |  dS )zH
        Generator that returns FASTA entries in tuples (name, seq)
    Tr-   r2   r   r4   FN)r'   r.   rO   r1   rM   r6   r7   )r   firstrG   rF   r    r:   r;   r0   r%   r%   r&   rB      s&    
rB   c                 C   s(   g }t | D ]\}}|||f q|S )zB
        Returns list of FASTA entries (in tuples: name, seq)
    )rB   rM   )r   list_seqrF   rG   r%   r%   r&   read_fasta_one_time   s    rV   c                 C   s`   t | }g }|D ]8}|d}|D ]$}|s,q"|d dkr"||  q"q|  d|}|S )z
        Returns string
    r2   r   r4   r-   )r'   r.   rM   r6   r7   rO   )r   r    rU   r:   r;   r0   Z	fasta_strr%   r%   r&   read_fasta_str   s    

rW   c                 C   sH   | D ]>\}}t d|  tdt|dD ]}t |||d   q(qd S )Nz>%sr   <   )printranger   )fastarF   rG   ir%   r%   r&   print_fasta   s    r]   rI   c                 C   sb   t | |}|D ]F\}}|d|  tdt|dD ]}||||d  d  q4q|  d S )Nz>%s
r   rX   rL   )r   rN   rZ   r   r7   )r   r[   r
   ZoutfilerF   rG   r\   r%   r%   r&   write_fasta  s    
r^   c                 C   s   dddddd|    S )NTAGCr=   )r`   r_   rb   ra   r=   )upper)Zletterr%   r%   r&   comp  s    rd   c                    s*   t tdd d fddt| D S )NZ
ATCGNatcgnZ
TAGCNtagcnr-   c                 3   s   | ]}  |d V  qdS )r-   N)get)r>   Z
nucleotidecr%   r&   rA     s     zrev_comp.<locals>.<genexpr>)dictziprO   reversed)rG   r%   rf   r&   rev_comp  s    rk   )F)rI   )"Z
__future__r   r   r   r   r   collectionsr   ImportErrorZ%quast_libs.site_packages.ordered_dictr   Zquast_libs.site_packagesr(   r)   Z
quast_libsr   Zquast_libs.logr   ZLOGGER_DEFAULT_NAMEr   r'   r   r1   r<   rH   rP   rS   rB   rV   rW   r]   r^   rd   rk   r%   r%   r%   r&   <module>   s>   
,
#


