U
    adk                     @   s   d dl mZ d dlZd dlZd dlZd dlZd dlZd dlmZm	Z	m
Z
 d dlmZ d dlmZmZmZ d dlmZ d dlmZ d dlmZ d d	lmZmZmZ ee	jZd
Zdd Zdd Zdd Z dd Z!dddZ"dd Z#dS )    )with_statementN)	reportingqconfigqutils)open_gzipsafe)
read_fastawrite_fastarev_comp)add_genes_to_fasta)Gene)
get_logger)compile_toolget_path_to_programrun_parallelFc              
   C   sR   t |d}|d | D ],}t|}|t|dd W 5 Q R X q|  |S )z<Merges all GFF files into a single one, dropping GFF header.wz##gff-version 3
   N)r   writeopen
writelines	itertoolsisliceclose)gffsZout_pathZout_filegff_pathgff_file r   1lib/python3.8/site-packages/quast_libs/glimmer.py
merge_gffs   s    


 r   c              
   c   s   t | }tjttdd |dd}|D ]V\	}}}}}}}	}
}|dkrHq(tdd |dD }||d	t|t||	fV  q(|	  d S )
Nc                 S   s   |  d S )N#)
startswith)lr   r   r   <lambda>*       zparse_gff.<locals>.<lambda>	)Z	delimiterZmRNAc                 s   s   | ]}| d V  qdS )=N)split).0Zkvr   r   r   	<genexpr>0   s     zparse_gff.<locals>.<genexpr>;Name)
r   csvreaderlistfilterdictr%   getintr   )r   r   rindexZ_sourcetypestartendZscorestrandZphaseZextraZattrsr   r   r   	parse_gff(   s     r7   c                    s   fdd}t j| ddi }	g }
tj|d}tt|D ]v\}\}}t|}|d tj	 }t j||d }t j||d }t
|||fg |||dkrB|
| ||	|< qB|
sd	S |d
 }t|
|}t d }}g t|D ]\}}}}}|d7 }|dkr"|	| |d | }nt|	| |d | }||krN|| t|||||d}|jdko||jt|	| k |_| qfdd|D }fdd|D }tr|d }t| tjst| |t||||fS )Nc              
      sT   t  d@}tj| ddd|g||dt d d}|W  5 Q R  S Q R X d S )Naz-dz-gz-o  )stdoutstderrindent)r   r   Zcall_subprocessindex_to_str)contig_pathZtmp_pathZerr_fileZreturn_code)err_pathr2   tool_exec_fpathtrained_dirr   r   run6   s    zglimmerHMM.<locals>.runZtrainedZarabidopsis)dirz.fastaz.gffr   )NNNNNNz
_genes.gff   +)contigr4   r5   r6   seqc                    s"   g | ] t  fd dD qS )c                    s"   g | ]}|j r|j|j  kqS r   is_fullr5   r4   r&   geneZ	thresholdr   r   
<listcomp>c   s      )glimmerHMM.<locals>.<listcomp>.<listcomp>sumr&   genesrL   r   rM   c   s     zglimmerHMM.<locals>.<listcomp>c                    s"   g | ] t  fd dD qS )c                    s"   g | ]}|j s|j|j  kqS r   rH   rJ   rL   r   r   rM   d   s      rN   rO   rQ   rR   rL   r   rM   d   s     z_genes.fasta)ospathjointempfileZmkdtemp	enumerater   strr   ZMAX_CONTIG_NAME_GLIMMERr   appendr   setr7   r	   addr   r4   r5   lenrI   OUTPUT_FASTAr
   r   debugshutilrmtree)Ztool_dirr@   Zfasta_fpath	out_fpathgene_lengthsr?   Ztmp_dirr2   rB   Zcontigsr   Zbase_dirZseq_numZindrG   r>   r   Zout_gff_fpathout_gff_pathuniquetotalrF   Zgene_idr4   r5   r6   Zgene_seqrK   full_cntpartial_cntZout_fasta_fpathr   )r?   rS   r2   r@   rA   r   
glimmerHMM5   sL    






 

ri   c              	   C   s   t |}t |}tdt |  |  tj||d }	tj||d }
t	||||	||
|| \}}}}}}|rtdt |  d t
| d t
| d  tdt |  d |  ||||fS )Nr9   Z_glimmerz_glimmer.stderrz
  Genes = z	 unique, z totalz  Predicted genes (GFF): )r   label_from_fpathZlabel_from_fpath_for_fnameloggerinfor=   rT   rU   rV   ri   rY   )r2   contigs_fpathrc   out_dirpathtool_dirpathr@   tmp_dirpathZassembly_labelZcorr_assembly_labelrb   Z	err_fpathrd   rS   re   rf   
full_genespartial_genesr   r   r   predict_geneso   s"    

     0rs   c                 C   sF   t jtjd}t j|d}td|s<td|dg| |d td|S )NglimmersrcZ
glimmerhmmZ
GlimmerHMMz../glimmerhmm)rk   
only_clean)rT   rU   rV   r   LIBS_LOCATIONr   r   )rk   rv   ro   Ztool_src_dirpathr   r   r   compile_glimmer   s
    
rx   c                    s  t   t d tjtjdtjdtt s@d S tj	sVt
 tj	slt
 tt| tj} fddt| D }tt||\}}}}t }	t| D ]\}
}t|}t|}||
 |	|< ||
 d k	r|tjj||
  ||
 d k	r@dd t||
 ||
 D }|tjj| ||
 d kr||
 d krt d| tjsndnd	  qtjst t d
 |	S )NzRunning GlimmerHMM...rt   Ztmpc              	      s"   g | ]\}}|| fqS r   r   )r&   r2   rm   rc   rn   rp   ro   r@   r   r   rM      s   zdo.<locals>.<listcomp>c                 S   s   g | ]\}}d ||f qS )z%s + %s partr   )r&   rg   rh   r   r   r   rM      s     zFailed running Glimmer for %s. z4Run with the --debug option to see the command line. zDone.)rk   Zprint_timestampZ	main_inforT   rU   rV   r   rw   rx   isdirmakedirsminr]   Zmax_threadsrX   r   rs   r.   r   r/   r   rj   Z	add_fieldZFieldsZPREDICTED_GENES_UNIQUEzipZPREDICTED_GENESerrorr_   r`   ra   )Zcontigs_fpathsrc   rn   Zn_jobsZparallel_argsZ
genes_listre   rq   rr   Zgenes_by_labelsirm   ZreportZlabelrS   r   ry   r   do   sL    






r   )F)$Z
__future__r   rT   rW   r   r*   r`   Z
quast_libsr   r   r   Zquast_libs.ca_utils.miscr   Zquast_libs.fastaparserr   r   r	   Zquast_libs.genemarkr
   Zquast_libs.genes_parserr   Zquast_libs.logr   Zquast_libs.qutilsr   r   r   ZLOGGER_DEFAULT_NAMErk   r^   r   r7   ri   rs   rx   r   r   r   r   r   <module>   s(   
:
	