U
    ÔL_Ð  ã                   @   st   d Z ddlmZmZ ddlmZ ddlmZ ddlm	Z	 e	e
ƒZG dd„ deƒZdd	„ Zd
d„ Zdd„ Zdd„ ZdS )zz
Collection of functions for metagenomic pipeline for taxonomic classification
Created on June 3, 2014

@author: namphuon
é    )ÚJoinÚJobPool)ÚSeppProblem)ÚHMMAlignJob)Ú
get_loggerc                   @   s8   e Zd ZdZdd„ Zdd„ Zdd„ Zdd	„ Zd
d„ ZdS )ÚJoinBlastJobsz¥
    After all blast search jobs have finished on markers, we need to figure out
    which fragment goes to which marker.
    This join takes care of that step.
    c                 C   s   t  | ¡ d S )N)r   Ú__init__©Úself© r   ú+lib/python3.8/site-packages/sepp/scratch.pyr      s    zJoinBlastJobs.__init__c                 C   s(   || _ | ¡ D ]}|  |jd ¡ qd S )NZblastsearch)Úroot_problemÚiter_leavesZadd_jobÚjobs)r
   r   Úpr   r   r   Úsetup_with_root_problem   s    z%JoinBlastJobs.setup_with_root_problemc           
      C   s  d| j jkrdS tdd„ | j j ¡ D ƒƒ}| j  ¡ D ]„}| ¡ }t|tƒsNt	‚|jdkrh| j j 
g ¡|_| d¡}| ¡ D ]<}|| \}}|dks¢||| d k rz|| d |f||< qzq4g }| ¡ D ]2\}}	|	d dkræ| |¡ qÆ|	d jj |¡ qÆd| j jd< t dt|ƒ ¡ dS )z4 Figure out which fragment should go to which markerzfragments.distribution.doneNc                 S   s   g | ]}|d f‘qS ))NNr   )Ú.0Únamer   r   r   Ú
<listcomp>!   s   ÿz;JoinBlastJobs.figureout_fragment_marker.<locals>.<listcomp>Z	hmmsearché   z.Fragments %s are not scored against any subset)r   ZannotationsÚdictÚ	fragmentsÚkeysr   Z
get_parentÚ
isinstancer   ÚAssertionErrorZget_soft_sub_alignmentÚget_job_result_by_nameÚitemsÚappendZ	seq_namesÚaddÚ_LOGZwarningÚstr)
r
   Zmax_evaluesÚfragment_chunk_problemZalign_problemZ
search_resÚkeyZ
best_valueZprev_align_problemZ	notScoredÚvr   r   r   Úfigureout_fragment_marker   s6    
ÿ
ÿÿ
ÿz'JoinBlastJobs.figureout_fragment_markerc                 C   sÊ   |   ¡  dd„ | jjD ƒ}|D ]¦}t|tƒs0t‚t| ¡ ƒ}|j 	|¡}t
|jƒD ]p\}}|| |_|jd }t|tƒs|t‚| d¡|_|jd j|_|j ¡ s°|j |j¡ nd|_tƒ  |¡ qRqdS )zº
        Distributes fragments to alignments subsets with best score,
        and runs align jobs on those. Also, creates new chunks of fragments
        for better parallelism.
        c                 S   s   g | ]}|j D ]}|‘qqS r   )Úchildren)r   r   Zalgr   r   r   r   W   s     ÿz)JoinBlastJobs.perform.<locals>.<listcomp>ZhmmalignZhmmbuildTN)Zfigureout_fragment_subsetr   r%   r   r   r   ÚlenZget_childrenr   Zdivide_to_equal_chunksÚ	enumerater   r   r   ZhmmmodelZinfileZbase_alignmentZis_emptyZwrite_to_pathZfake_runr   Zenqueue_job)r
   Zalg_problemsZalg_problemZchunksZfragment_chunksÚir!   Zajr   r   r   ÚperformJ   s(    ÿ


ÿzJoinBlastJobs.performc                 C   s
   d| j fS )Nz!join search jobs for all tips of )r   r	   r   r   r   Ú__str__p   s    zJoinBlastJobs.__str__N)	Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   r   r$   r)   r*   r   r   r   r   r      s   -&r   c                 C   s   dS )z:Blast the fragments against all marker genes+16S sequencesNr   ©ZconfigÚinputÚoutputr   r   r   Úblast_fragmentst   s    r2   c                 C   s   dS )z€Fixes the direction of all the reads by searching the
       sequences against each HMM and finding out which one works the bestNr   r/   r   r   r   Úfix_directionx   s    r3   c                 C   s   dS )zGReverse a sequence to be in the same direction as marker sequences
    Nr   )Zsequencer   r   r   Úreverse_sequence}   s    r4   c                 C   s   dS )z Reads the results for blast
    Nr   )r0   r   r   r   Úread_blast_results‚   s    r5   N)r.   Zsepp.schedulerr   r   Zsepp.problemr   Z	sepp.jobsr   Zseppr   r+   r   r   r2   r3   r4   r5   r   r   r   r   Ú<module>   s   e