U
    L_2V                     @   s   d Z ddlmZ ddlmZ ddlmZ ddlmZm	Z	m
Z
 ddlmZ ddlmZ ddlmZmZmZmZmZ dd	lmZmZ dd
lmZ ddlmZ eeZdd ZG dd deZG dd deZ G dd deZ!edkre! "  dS )z,
Created on Oct 10, 2012

@author: smirarab
    )AbstractAlgorithm)options)PhylogeneticTree)MutableAlignmentExtendedAlignmenthamming_distance)SeppProblem)Tree)HMMBuildJobHMMSearchJobHMMAlignJob
PplacerJobMergeJsonJob)JobPoolJoin)
get_logger)lcmc                 C   s   d|  S )Nz
pplacer_%d )Zchunk_numberr   r   .lib/python3.8/site-packages/sepp/exhaustive.pyget_placement_job_name   s    r   c                   @   s8   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d ZdS )JoinSearchJobsz
    After all search jobs have finished on tips, we need to figure out which
    fragment goes  to which subset and start aligning fragments.
    This join takes care of that step.
    c                 C   s   t |  d S Nr   __init__selfr   r   r   r      s    zJoinSearchJobs.__init__c                 C   s(   || _ | D ]}| |jd  qd S )N	hmmsearch)root_problemiter_leavesadd_jobjobs)r   r   pr   r   r   setup_with_root_problem"   s    z&JoinSearchJobs.setup_with_root_problemc                 C   s  d| j jkrdS tdd | j j D }| j  D ]}| }t|tsNt	|jdkrh| j j
g |_|d}| D ]8\}}|| \}}|dks||d k rz|d |f||< qzq4g }	| D ]2\}}
|
d dkr|	| q|
d jj| qd| j jd< tdt|	  dS )z8 Figure out which fragment should go to which subproblemzfragments.distribution.doneNc                 S   s   g | ]}|d fqS ))NNr   ).0namer   r   r   
<listcomp>,   s     z<JoinSearchJobs.figureout_fragment_subset.<locals>.<listcomp>r      z.Fragments %s are not scored against any subset)r   Zannotationsdict	fragmentskeysr   Z
get_parent
isinstancer   AssertionErrorget_soft_sub_alignmentget_job_result_by_nameitemsappend	seq_namesadd_LOGZwarningstr)r   Zmax_evaluesfragment_chunk_problemZalign_problemZ
search_reskeyvalZ
best_valueZprev_align_problemZ	notScoredvr   r   r   figureout_fragment_subset'   s:    


z(JoinSearchJobs.figureout_fragment_subsetc                 C   s   |    dd | jjD }|D ]}t|ts0tt| }|j	|}t
|jD ]z\}}|| |_|jd }t|ts|t|d|_|jd j|_|jdks|j rd|_n|j|j t | qRqdS )z
        Distributes fragments to alignments subsets with best score,
        and runs align jobs on those. Also, creates new chunks of fragments
        for better parallelism.
        c                 S   s   g | ]}|j D ]}|qqS r   )children)r#   r!   Zalgr   r   r   r%   b   s     z*JoinSearchJobs.perform.<locals>.<listcomp>hmmalignhmmbuildNT)r8   r   r9   r*   r   r+   lenget_childrenr(   Zdivide_to_equal_chunks	enumerater    r   r-   hmmmodelinfileZbase_alignmentis_emptyfake_runwrite_to_pathr   enqueue_job)r   Zalg_problemsalg_problemZchunksfragment_chunksir4   ajr   r   r   performU   s.    

zJoinSearchJobs.performc                 C   s
   d| j fS )Nz!join search jobs for all tips of )r   r   r   r   r   __str__~   s    zJoinSearchJobs.__str__N)	__name__
__module____qualname____doc__r   r"   r8   rI   rJ   r   r   r   r   r      s   .)r   c                   @   s8   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d ZdS )JoinAlignJobsz
    After all alignments jobs for a placement subset have finished,
    we need to build those extended alignments and start placing fragments.
    This join takes care of that step.
    c                 C   s   t |  d S r   r   r   r   r   r   r      s    zJoinAlignJobs.__init__c                 C   s0   || _ |j| _| D ]}| |jd  qd S )Nr:   )placement_problemparentr   r   r   r    )r   rP   r!   r   r   r   setup_with_placement_problem   s    z*JoinAlignJobs.setup_with_placement_problemc           	      C   s2  | j }td|j  |jjg |_| D ]}|j jt	|jO  _q.t
 }|jD ]*}t|tshtdd |jD }|||< qVg }td| jjD ]}t|jj}|jD ]f}|| | r||jd j|| | g}n||jd jg }td||jf  |j|dd q|  |  || q|S )	z
        Merge alignment subset extended alignments to get one extended
        alignment for current placement subset.
        z2Merging sub-alignments for placement problem : %s.c                 S   s   g | ]}| d qS )r:   )r-   )r#   fpr   r   r   r%      s   z5JoinAlignJobs.merge_subalignments.<locals>.<listcomp>r   r;   z@Merging alignment subset into placement subset for chunk %d: %s.FZconvert_to_string)rP   r2   infolabelrQ   r(   r,   r=   r0   setr'   r9   r*   r   r+   ranger   rF   r   Z,read_extendend_alignment_and_relabel_columnsr    r@   debugmerge_inZremove_missing_fragmentsZfrom_bytearray_to_stringr/   )	r   ppapZfragfilesperapZaligned_filesZextendedAlignmentsrG   ZextendedAlignmentZap_algr   r   r   merge_subalignments   sJ    



 
 z!JoinAlignJobs.merge_subalignmentsc                 C   s   | j }|  }td| jjD ]v}|| }| }| }|jt| }t	|t
sTt| rbd|_||j ||j |d| t | qd S )Nr   Tfull_extended_alignment_object)rP   r]   rX   r   rF   Z get_fragments_readonly_alignmentZget_base_readonly_alignmentr    r   r*   r   r+   rA   rB   rC   Zextended_alignment_fileZbackbone_alignment_fileZset_attributer   rD   )r   r[   ZfullExtendedAlignmentsrG   fullExtendedAlignmentZqueryExtendedAlignmentZbaseAlignmentpjr   r   r   rI      s$     zJoinAlignJobs.performc                 C   s
   d| j fS )Nzjoin align jobs for tips of )rP   r   r   r   r   rJ      s    zJoinAlignJobs.__str__N)	rK   rL   rM   rN   r   rR   r]   rI   rJ   r   r   r   r   rO      s   0rO   c                   @   st   e Zd ZdZdd Zdd Zdd Zdd	 Zg fd
dZdd Z	dd Z
dd Zdd Zdd Zdd Zdd ZdS )ExhaustiveAlgorithmzt
    This implements the exhaustive algorithm where all alignments subsets
    are searched for every fragment.
    c                 C   s^   t |  d| _d| _d| _d| _t jj| _t j	| _	t
t jj| _| jj| _t | _d S )NFiT)r   r   Zplace_nomatch_fragmentselimfilterssymfracr   
exhaustivestrategydecomp_strategyintminsubsetsizemoleculer'   	distancesr   r   r   r   r      s    


zExhaustiveAlgorithm.__init__c                 C   sz   |  D ]l\}}|  D ]Z\}}d||g| jkrt||| jd||g< | jd||g | jd||g< qqd S )N )r.   joinrk   r   )r   	sequencesZseq1Zval1Zseq2Zval2r   r   r   compute_distances   s    z%ExhaustiveAlgorithm.compute_distancesc              
   C   s8  t | jtstt| jj }| j D ]:}td| jj	D ]&}|j
t| d}|j|dd q<q*|| _g }|d| jjjdd  | j D ]`}t |tsttd| jj	D ]>}|t|d krq|d|jjdd|t|f  qq|d |d d	|}t }||| d
 |  d S )Nr   r^   TrT   z%s;)labelsz%s;
%srl   
zplacement.json)r*   r   r   r+   r   r(   r)   r=   rX   rF   r    r   Zget_attributerZ   resultsr/   subtreeZcompose_newickr-   rm   r   Zsetupget_output_filenamerun)r   r_   r[   rG   Zextended_alignmentZ
mergeinputZmeregeinputstringZmergeJsonJobr   r   r   merge_results   sV    
 



z!ExhaustiveAlgorithm.merge_resultsc              	   C   st   |  d}| j| | j  |  d}| j| | jj }|rp|  d}t|d}|| W 5 Q R X dS )z Merged json file is already saved in merge_results function and
            full extended alignment already created in merge_results function
        zalignment.fastazalignment_masked.fastazrename-json.pywN)	rt   rr   rC   Zremove_insertion_columnsr   rs   Zrename_scriptopenwrite)r   ZoutfilenameZnamerev_scriptsr   r   r   output_results$  s    



z"ExhaustiveAlgorithm.output_resultsc                 C   s&   t  jd kr|dg }t| | d S )Nz
raxml file)r   	info_filer   check_options)r   Zsupplyr   r   r   r}   3  s    
z!ExhaustiveAlgorithm.check_optionsc                 C   s   d S r   r   )r   a_treer   r   r   modify_tree8  s    zExhaustiveAlgorithm.modify_treec                 C   s  |   \}}t jdkr"| | t|ts0tt|ts>t| 	  |
  | |  | || tt|jj| jj| j| jjt| jjj i d| j| jd d}t|dkstd| j| j| jjf tdt|  | D ]^\}}t|tstt| | j}||_ dt!| |_"t#d|j"t| f  tt|jj| jj$| j| ji | jjt j| j| jj%d}t|dkstd	| j| j| jj$f t#d
|j"t|t!t&|' f  t#d|j"t(dd |) D f  | D ]N\}}	t|	tst| *|	 t|	 |}
|	|
_ dt!|t!|f |
_"qqtdtt+| j,   | - }t|| j_.| j, D ]V}
t/d| jj.D ]@}t|
j0|
}|
j |_ |
j"1ddd t!| |_"|| |_2qqtd| jj.  t#dt!| j  | jS )Nr&   )rf   minSizetree_map	pdistancerg   rk   maxDiamr   ziTree could not be decomposed given the following settings; strategy:%s minsubsetsize:%s placement_size:%sz#Breaking into %d placement subsets.zP_%sz Placement subset %s has %d nodes)rf   r   r   rg   r   rk   r   zhTree could not be decomposed given the following settings; strategy:%s minsubsetsize:%s alignmet_size:%sz0Placement subset %s has %d alignment subsets: %sz Placement subset %s has %d taxa:c                 S   s   g | ]}t | qS r   )r<   leaf_node_names)r#   r~   r   r   r   r%     s   z9ExhaustiveAlgorithm.build_subproblems.<locals>.<listcomp>zA_%s_%sz#Breaking into %d alignment subsets.ZA_ZFC__z7Breaking each alignment subset into %d fragment chunks.zSubproblem structure: %s)3Zread_alignment_and_treer   Zdistancero   r*   r   r+   r   Zget_treeZresolve_polytomiesZlable_edgesZcheck_and_set_sizesZget_num_taxaZ_create_root_problemr	   Zden_treeZdecompose_treeZplacement_sizerf   rh   re   Zplacementminsubsetsizefacotrrg   rk   r<   ri   r2   rU   r.   r   r   r   rs   r3   rV   rY   Zalignment_sizer   sortedr)   sumvaluesr   listr   create_fragment_filesrF   rX   Ztaxareplacer(   )r   Z	alignmentZtreeZplacement_tree_mapZp_keyZp_treerP   Zalignment_tree_mapZa_keyr~   Zalignment_problemZfragment_chunk_filesZafcZfrag_chunk_problemr   r   r   build_subproblems;  s    
 
 


 z%ExhaustiveAlgorithm.build_subproblemsc                 C   s.   t t| j }t|| jj| }| |S r   )r<   r   r   r   r   r   ZcpuZread_and_divide_fragments)r   Zalg_subset_countZfrag_chunk_countr   r   r   r     s     z)ExhaustiveAlgorithm.create_fragment_filesc                 C   s   t  S r   )rO   r   r   r   r   _get_new_Join_Align_Job  s    z+ExhaustiveAlgorithm._get_new_Join_Align_Jobc           	   	   C   s  t | jtst| j D ]}td| jjD ],}t }||| j	j
| |t|| q,|jD ]}t |tsrtt }|j|f| j| jdt| j	j ||j| | D ]R}t }||j|| j| j ||j| t }||j| |j|| jd qq`qd S )Nr   )rd   rj   )rj   )r*   r   r   r+   r=   rX   rF   r   Zpartial_setup_for_subproblemr   r|   r   r   r9   r
   Zsetup_for_subproblemrd   rj   varsr;   Zjob_typer   r(   rb   rc   r   )	r   rP   rG   r`   rE   bj
fc_problemsjrH   r   r   r   
build_jobs  sJ      

   zExhaustiveAlgorithm.build_jobsc              	      s   dd  t | jtst| j D ]h}|jD ]J}t |ts>t|jd }| D ]$}|jd }||f fdd	 qPq,|  }|	| q"t
 }|| j dS )z< a callback function called after hmmbuild jobs are finishedc                 S   s   | |_ t | d S r   )r?   r   rD   )resultZ
search_jobr   r   r   enq_job_searchfragment  s    z@ExhaustiveAlgorithm.connect_jobs.<locals>.enq_job_searchfragmentr;   r   c                    s
    | |S r   r   )r   Znext_jobr   r   r   <lambda>  s    z2ExhaustiveAlgorithm.connect_jobs.<locals>.<lambda>N)r*   r   r   r+   r=   r9   r    Zadd_call_Backr   rR   r   r"   )r   rP   rE   r   r   r   ZjajZjsjr   r   r   connect_jobs  s    


z ExhaustiveAlgorithm.connect_jobsc                 C   s0   | j jD ]"}|jD ]}t |jd  qqd S )Nr;   )r   r9   r   rD   r    )r   r!   r\   r   r   r   enqueue_firstlevel_job  s    
z*ExhaustiveAlgorithm.enqueue_firstlevel_jobN)rK   rL   rM   rN   r   ro   rv   r{   r}   r   r   r   r   r   r   r   r   r   r   r   ra      s   	(f!ra   __main__N)#rN   Zsepp.algorithmr   Zsepp.configr   Z	sepp.treer   Zsepp.alignmentr   r   r   Zsepp.problemr   Zdendropy.datamodel.treemodelr	   Z	sepp.jobsr
   r   r   r   r   Zsepp.schedulerr   r   Zseppr   Zsepp.math_utilsr   rK   r2   r   r   rO   ra   ru   r   r   r   r   <module>   s&   i^  