U
    ad /                     @   s   d dl mZ d dl mZ d dlZd dlZd dlmZ d dlmZ e	  d dlm
Z
mZmZmZ d dlmZ d dlmZmZmZmZ d d	lmZ eejZG d
d dZdd Zdd Zdd ZdddZdd Zdd ZdS )    )with_statement)divisionN)defaultdict)qconfig)contigs_analyzerfastaparser	reportingplotter_data)qutils)correct_seqcorrect_nameget_uniq_namerun_parallel)
get_loggerc                   @   s   e Zd Zdd ZdS )Assemblyc                 C   s,   || _ || _tjtj| j d | _d S )Nr   )fpathlabelospathsplitextbasenamename)selfr   r    r   3lib/python3.8/site-packages/quast_libs/metautils.py__init__   s    zAssembly.__init__N)__name__
__module____qualname__r   r   r   r   r   r      s   r   c              	      s  t | j}t | j}td|  g }|d }tj||}i  t	 }	i }
t
| j}|| }tj|rt|}|D  ]}| }|d tj krtj|d  }|dd  }tj||d | d }||
krg |
|< |D ]Z\}}| kr| |< ||kr||
| kr|	| |
| | t
|||fgd qt||}|j|kr||kr|| | ||j qW 5 Q R X tjrt| t	  }||	 }t
| fdd	|D  t|| j}||fS )
Nz  processing z_not_aligned_anywhere.fastar      _to_.fastaac                    s   g | ]}| | fqS r   r   ).0r   contigsr   r   
<listcomp>L   s     z.parallel_partition_contigs.<locals>.<listcomp>)r
   label_from_fpathr   label_from_fpath_for_fnameloggerinfor   r   joinsetr   Zread_fasta_one_timeexistsopensplitr   ref_labels_by_chromosomeskeysaddappendwrite_fastar   r   r   Zspace_efficientremover   )asmassemblies_by_refcorrected_dirpathalignments_fpath_templateassembly_labelcorr_assembly_labelZadded_ref_asmZnot_aligned_fnameZnot_aligned_fpathZaligned_contig_namesZaligned_contigs_for_each_refZcontigs_seqZalignments_fpathflinevaluesref_nameZref_contigs_namesref_contigs_fpathZ	cont_nameseqZref_asmZall_contigs_namesZnot_aligned_contigs_namesZnot_aligned_asmr   r$   r   parallel_partition_contigs   sV    
 




rB   c                    s   t dd |D ttjt| } fdd| D }tt||\}g |D ]v}t|t	dd fddt
tD D }	g }
|D ]&}|	D ]}|j|kr|
|  qqq||
f qP|fS )Nc                 S   s   g | ]}t |g fqS r   )r
   name_from_fpathr#   	ref_fpathr   r   r   r&   T   s     z%partition_contigs.<locals>.<listcomp>c                    s   g | ]}| fqS r   r   r#   r6   )r9   r7   r8   r   r   r&   V   s   c                 S   s   g | ]}|D ]}|qqS r   r   )r#   Zsublistvalr   r   r   r&   \   s       c                 3   s   | ]} |  V  qd S Nr   )r#   i)assemblies_dictsr?   r   r   	<genexpr>\   s     z$partition_contigs.<locals>.<genexpr>)dictminr   Zmax_threadslenr   rB   r
   rC   r,   ranger   r3   )
assemblies
ref_fpathsr8   r9   labelsZn_jobsZparallel_run_argsZnot_aligned_assembliesrE   Znot_sorted_assembliesZsorted_assembliesr   assemblyr   )r9   r7   rJ   r8   r?   r   partition_contigsR   s$    
*

rT   c                 C   s   t j|tj}tj}dt_tj| ||d d\}}|t_dd |D }dd |D }tjs`tj	rdd |D }	dd |D }
t
j|	|
d ||fS )	Nr   )r   c                 S   s   g | ]}t |t|qS r   )r   r
   r'   )r#   r   r   r   r   r&   n   s     z&correct_assemblies.<locals>.<listcomp>c                 S   s   g | ]
}|j qS r   r   rF   r   r   r   r&   o   s     c                 S   s   g | ]
}|j qS r   )r   rF   r   r   r   r&   r   s     c                 S   s   g | ]
}|j qS r   rU   rF   r   r   r   r&   s   s     )rR   )r   r   r+   r   Zcorrected_dirname
min_contigr
   Zcorrect_contigsZ
draw_plotsZhtml_reportr	   Zsave_colors_and_ls)Zcontigs_fpathsZoutput_dirpathrR   r8   rV   Zcorrected_contigs_fpathsZold_contigs_fpathsrP   Zcorrected_labelsZcorr_fpathsZcorr_labelsr   r   r   correct_assembliesg   s    rW   Fc              	      s  g t jtj}i   fdd}dd | D }g }|D ]}t|\}}	|| q>g }
t| }t	| |D ]P\}}d}t j
|}t|\}}	g  |< tt}d }tt|D ]f\}\}}|d7 }t|tjt| d }t||}||  d7  < |||||	||\}}|s q&q|rbtd| d t| d	  t|t|d
 qt|rtd| d   | D ]\}}tj|= q |=   |
| qttjd| d dd qt|
D ]}| | qt dkrtdtj  n
td | | fS )Nc           
         s   |}||7 }|dkrd }nt tj|}| t |d |  }tjsft	||}	|	sfdS t
|||fgd t |tj|<  | |t|f ||fS )Nr   _)NNr"   )r
   Zunique_corrected_fpathr   r   r+   r3   rC   r   Zno_checkr   r   r4   r   r0   rN   )
seq_namerA   r?   ref_fasta_exttotal_referencesrE   Z	seq_fnamecorr_seq_fpathcorr_seq_nameZcorr_seqchromosomes_by_refsr8   Zcorrected_ref_fpathsr   r   _proceed_seq   s    


z-correct_meta_references.<locals>._proceed_seqc                 S   s   g | ]}t j|qS r   )r   r   r   rD   r   r   r   r&      s     z+correct_meta_references.<locals>.<listcomp>r   r   z  z ==>  r"   z	Skipping z` because it is empty or contains incorrect sequences (header-only or with non-ACGTN characters)!zReference file zU is empty or contains incorrect sequences (header-only or with non-ACGTN characters)!)Zexit_with_codez"  All references were combined in zAll references were skipped!)r   r   r+   r   Zcombined_ref_namer
   splitext_for_fasta_filer3   Zprocess_labelszipr   r   int	enumerater   Z
read_fastar   ZMAX_CONTIG_NAMErN   r   r)   Z	main_inforC   r4   Zwarningr   r0   poperrorr5   )rQ   r8   Zdownloaded_refsZcombined_ref_fpathra   Z
ref_fnamesZ	ref_names	ref_fnamer?   r[   Zexcluded_ref_fpathsrE   r\   rY   Zused_seq_namesr]   rI   rZ   rA   Zuniq_seq_namer^   Zexcludedr   r_   r   correct_meta_referencesx   s^    

 

rj   c              	   C   s  i }t | dd}|  |D ]P}|dks,|s0 qntd|}|rt|dkr| }|d |d f||d < qW 5 Q R X g }|D ]}tj|}	t	
|	\}
}d}d}||
 D ]@}|d |kr|t||d  d 7 }|t||d  d 7 }q|sq||tj kr|| q|S )Nr
zlength: (\d+)   r   r   )r.   readlinerefindallrN   r/   r   r   r   r
   rc   re   r   Zdownloaded_ref_min_aligned_rater3   )Zgenome_info_fpathrQ   r`   Zrefs_lenZreport_filer=   ZlengthsZ	corr_refsrE   ri   refr[   aligned_lenZall_lenZ
chromosomer   r   r   #get_downloaded_refs_with_alignments   s2    $rs   c              
   C   s(  t j| tjtj}|D ]}t }t|j	}t
|j	}t|| B}|D ]6}| d\}	}
}||	g t|
t|f qNW 5 Q R X | D ]\}	}tdd |D tdd |D  }t
|j	}t jt j|j	|d |	 d }|tj|< tj||	d}|tjjd|  qqd S )	N	c                 s   s   | ]\}}|| V  qd S rH   r   r#   rr   
contig_covr   r   r   rK      s     z-calculate_ave_read_support.<locals>.<genexpr>c                 s   s   | ]\}}|V  qd S rH   r   ru   r   r   r   rK      s     r    r!   )r?   z%.2f)r   r   r+   r   Z detailed_contigs_reports_dirnameZunique_contigs_fname_patternrL   r
   r'   r   r(   r.   stripr/   
setdefaultr3   floatitemssumdirnameZassembly_labels_by_fpathr   getZ	add_fieldZFieldsZAVE_READ_SUPPORT)Zcombined_output_dirpathrP   Zunique_contigs_fpathrS   Zaligned_contigs_by_refr:   r;   Zin_fr=   r?   Z
contig_lenrv   r%   Zref_covr@   Zreportr   r   r   calculate_ave_read_support   s.    
* 
r~   )F)Z
__future__r   r   r   ro   collectionsr   Z
quast_libsr   Zcheck_python_versionr   r   r   r	   r
   Zquast_libs.qutilsr   r   r   r   Zquast_libs.logr   ZLOGGER_META_NAMEr)   r   rB   rT   rW   rj   rs   r~   r   r   r   r   <module>   s$   
3
Q