U
    L_q&                     @   s   d Z ddlZddlZddlmZ ddlmZ ddlmZ ddl	m
Z
mZ ddl	ZddlmZ ddlmZ dd	lmZmZ eeZG d
d deZG dd deZdd Zdd Zedkre  dS )z,
Created on May 31, 2015

@author: namphuon
    N)
get_logger)MutableAlignment)ExhaustiveAlgorithm)optionsvalid_decomp_strategy)lcm)SeppProblem)JobPoolJoinc                   @   s0   e Zd ZdZdd Zdd Zdd Zdd	 Zd
S )EnsembleJoinSearchJobsz
    After all search jobs have finished on tips, we need return the
    distribution of the bitscores for the search.  This join accomplishes this
    c                 C   s   t |  d S N)r
   __init__self r   ,lib/python3.8/site-packages/sepp/ensemble.pyr      s    zEnsembleJoinSearchJobs.__init__c                 C   s(   || _ | D ]}| |jd  qd S )N	hmmsearch)root_problemiter_leavesZadd_jobjobs)r   r   pr   r   r   setup_with_root_problem   s    z.EnsembleJoinSearchJobs.setup_with_root_problemc                 C   s   t d dS )zP
        A dummy join that waits for all the search results to complete
         N)printr   r   r   r   perform!   s    zEnsembleJoinSearchJobs.performc                 C   s
   d| j fS )Nz!join search jobs for all tips of )r   r   r   r   r   __str__'   s    zEnsembleJoinSearchJobs.__str__N)__name__
__module____qualname____doc__r   r   r   r   r   r   r   r   r      s
   r   c                   @   sH   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d Zdd Z	dd Z
dS )EnsembleExhaustiveAlgorithmz
    This implements the exhaustive algorithm where all alignments subsets
    are searched for every fragment. This is for UPP, meaning that no placement
    is performed, and that there is always only one placement subset
    (currently).
    c                 C   s    t |  d| _d | _d| _d S )NFT)r   r   ZsymfracZelimfiltersr   r   r   r   r   2   s    
z$EnsembleExhaustiveAlgorithm.__init__c                 C   sV   dt  _t  jd ks t  jd kr2td td t }|t	| j jj
 t| S )NZA_dummy_valuez;Specify the backbone alignment and tree and query sequences)r   	info_file	tree_filealignment_file_LOGerrorexitr   Zread_file_objectopennamer   check_options)r   	sequencesr   r   r   r+   8   s    z)EnsembleExhaustiveAlgorithm.check_optionsc                 C   s8   | j jd ks | j j|ks tdt| | || j _d S )Nzfcurrently eHMMs works with only one placement subset. Please leave placement subset size option blank.)r   placement_sizeAssertionErrorr   check_and_set_sizes)r   Ztotalr   r   r   r/   C   s    
z/EnsembleExhaustiveAlgorithm.check_and_set_sizesc           	      C   s   d| j jkrdS tdd | j j D }| j  D ]j}| }t|tsNt	|jdkrh| j j
g |_|d}| D ]"\}}|| |d |d g qzq4g }| D ]\}}t|dkr|| qd| j jd< tdt|  || _dS )	z merges search resultszfragments.distribution.doneNc                 S   s   g | ]}|g fqS r   r   ).0r*   r   r   r   
<listcomp>O   s   z=EnsembleExhaustiveAlgorithm.merge_results.<locals>.<listcomp>r      r   z.Fragments %s are not scored against any subset)r   ZannotationsdictZ	fragmentskeysr   Z
get_parent
isinstancer   r.   Zget_soft_sub_alignmentZget_job_result_by_nameitemsappendlenr&   Zwarningstrresults)	r   Zsequence_scoresZfragment_chunk_problemZalign_problemZ
search_reskeyvalZ	notScoredvr   r   r   merge_resultsK   s2    

z)EnsembleExhaustiveAlgorithm.merge_resultsc              	      s   dd  t | jtst| j D ]V}|jD ]J}t |ts>t|jd }| D ]$}|jd }||f fdd	 qPq,q"t }|	| j dS )z< a callback function called after hmmbuild jobs are finishedc                 S   s   | |_ t | d S r   )Zhmmmodelr	   Zenqueue_job)resultZ
search_jobr   r   r   enq_job_searchfragmentu   s    zHEnsembleExhaustiveAlgorithm.connect_jobs.<locals>.enq_job_searchfragmentZhmmbuildr   c                    s
    | |S r   r   )r?   Znext_jobr@   r   r   <lambda>   s    z:EnsembleExhaustiveAlgorithm.connect_jobs.<locals>.<lambda>N)
r5   r   r   r.   Zget_childrenZchildrenr   Zadd_call_Backr   r   )r   Zplacement_problemZalg_problemZbjZ
fc_problemZsjZjsjr   rA   r   connect_jobss   s    



z(EnsembleExhaustiveAlgorithm.connect_jobsc           	      C   s   | j }td | d}| d}t|d}t|d}|d | D ]P\}}t|dkrn|d|  qJ|D ]&}|d||d d	|d
 f  qrqJ|	  |	  d S )Nz"Generating csv of search results. z
scores.csvzunmatched.csvwzseq,bitscore,evalue
r   z%s z%s,%0.4f,%s
z{:.3e}r2   )
r:   r&   infoZget_output_filenamer)   writer6   r8   formatclose)	r   Zsearch_resultsZoutfilenameZnot_matchedfZ	unmatchedr;   valueZpairr   r   r   output_results   s$    





  z*EnsembleExhaustiveAlgorithm.output_resultsc                 C   s.   t t| j }t|| jj| }| |S r   )r8   listr   r   r   r   ZcpuZread_and_divide_fragments)r   Zalg_subset_countZfrag_chunk_countr   r   r   create_fragment_files   s     z1EnsembleExhaustiveAlgorithm.create_fragment_filesN)r   r   r   r   r   r+   r/   r>   rC   rK   rM   r   r   r   r   r    +   s   (r    c               	   C   sb  t jtjd t j } d| _| jd }d	ddg|j
d< |jdd	td
dddd |jddtddddd | jd }|jddtdddd dd |jddddtdd d! |jd"d#d$d%tdd&d! |jd'd(d)d*tdd+d! | d, d- | d. d/}|jd0d1td2dd d3d |jd4d5d6d7tdd8d! |jd9d:d;d<tdd=d! d S )>Nz~/.sepp/upp.configzThis script runs the UPP algorithm on set of sequences.  A backbone alignment and tree can be given as input.  If none is provided, a backbone will be automatically generated.decompGroup zThese optionszadetermine the alignment decomposition size, backbone size, and how to decompose the backbone set.descriptionz-Az--alignmentSizeZalignment_sizeN
   z,max alignment subset size of N [default: 10])typedestmetavardefaulthelpz-Sz--decomp_strategyZdecomp_strategyZDECOMPZhierarchicalzAdecomposition strategy [default: ensemble of HMMs (hierarchical)]
inputGroupz-sz--sequence_filerZsequence_fileZSEQzUnaligned sequence file.  If no backbone tree and alignment is given, the sequence file will be randomly split into a backbone set (size set to B) and query set (remaining sequences), [default: None]z-cz--configZconfig_fileZCONFIGzA config file, including options used to run UPP. Options provided as command line arguments overwrite config file values for those options. [default: %(default)s])rT   rU   rS   rW   z-tz--treer$   ZTREEz6Input tree file (newick format) [default: %(default)s]z-az--alignmentr%   ZALIGNz)Aligned fasta file [default: %(default)s]zUPP Optionsz*These options set settings specific to UPPzSEPP OptionszEThese options set settings specific to SEPP and are not used for UPP.z-Pz--placementSizer-   zJmax placement subset size of N [default: 10%% of the total number of taxa]z-rz--raxmlr#   ZRAXMLzURAxML_info file including model parameters, generated by RAxML.[default: %(default)s]z-fz
--fragmentZfragment_fileZFRAGz$fragment file [default: %(default)s])seppZconfigZset_main_config_pathospath
expanduserZ
get_parserrP   groupsjoin__dict__add_argumentintr   argparseZFileTypeZadd_argument_groupupper)parserrN   rX   Z	seppGroupr   r   r   augment_parser   s    

      
                rf   c                   C   s   t   t   d S r   )rf   r    runr   r   r   r   main   s    rh   __main__)r   rc   r[   rZ   r   Zsepp.alignmentr   Zsepp.exhaustiver   Zsepp.configr   r   Zsepp.math_utilsr   Zsepp.problemr   Zsepp.schedulerr	   r
   r   r&   r   r    rf   rh   r   r   r   r   <module>   s"   xS