U
    L_?                     @   s   d Z ddlmZmZmZ ddlmZ ddlmZ	 ddl
mZmZ ddlmZ ddlmZ zddlmZ W n  ek
r   ddlmZ Y nX dd	lZdd	lZdd	lZdd	lZeeZG d
d deZdd Zdd Zdd Zd	S )zMSATe - Phylogenetic Tree Container, effectively a wrapper of
   dendropy.Tree    )TreeTaxontreecalc)DataSet)_convert_node_to_root_polytomy)
get_loggersortByValue)get_pdistance)decompose_by_diameter)StringIONc                   @   s   e Zd ZdZd8ddZdd Zdd Zd	d
 Zdd Ze	eZ
dd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd  Zd!d" Zd9d$d%Zd&d' Zd(d) Zd*d+ Zd:d.d/Zd-i d0d1d-d-fd2d3Zd4d5 Zd6d7 Zd-S );PhylogeneticTreezBData structure to store phylogenetic tree, wrapping dendropy.Tree.Tc                 C   sT   || _ t| j tst|  | _d | j jj_d | j jj_	d | _
d | _|rP|   d S N)_tree
isinstancer   AssertionErrorcount_leavesn_leaves	seed_nodeedge	tail_nodelength_namemap
_revscriptmap_seq_names)selfdendropy_treeZmap_internal_node_names r   (lib/python3.8/site-packages/sepp/tree.py__init__/   s    
zPhylogeneticTree.__init__c              
   C   s   d}d dd tdD }t | _t }| j D ]P}|jr4d||tjdt	j
dd	 d|jd
f }|| j|< |j||< |d }q4dt||f | _d S )N    c                 s   s   | ]}t tjV  qd S r   )randomZchoicestringZascii_letters).0Zletterr   r   r   	<genexpr>=   s   z1PhylogeneticTree.map_seq_names.<locals>.<genexpr>   z	%sN%06d%sz[%s]_ )replr"   a  import ast, re, string, sys
revnamemap = ast.literal_eval("%s")
def relabel_newick(newick_string):
    pattern = re.compile("(%sN[^(,:)<>]+)")
    invalidChars = set(string.punctuation).union(set(string.whitespace))
    def replace_func(m):
        repl = m.group(1)
        if m.group(1) in revnamemap:
            repl = revnamemap[m.group(1)]
            if any(char in invalidChars for char in repl):
                repl = "'%%s'" %%repl
        else:
            repl = m.group(1)

        return repl
    t = pattern.sub(replace_func,newick_string)
    return t
for l2 in sys.stdin.readlines():
        sys.stdout.write(relabel_newick(l2)))joinrangedictr   r   Zinternal_nodeslabelresubr"   Zpunctuationreplacestrr   )r   itagZ
revnamemapnl1r   r   r   r   ;   s&    

 




zPhylogeneticTree.map_seq_namesc                 C   s   | j S r   )r   r   r   r   r   rename_script_   s    zPhylogeneticTree.rename_scriptc           	      C   s   |  }|rP|d |d }|D ]"}||k	r8|d | || q"|d | s^| jsr|d|   n|| jkr|d| j|   |j}|r|j}|d k	rd}zt|}t	|}W n t
k
r   t	|}Y nX |r|d||jf  d S )N(r   ,)z%sr    z:%s[%s])child_nodeswritewrite_newick_nodeis_leafr   Z_get_node_tokenr   r   floatr0   
ValueErrorr,   )	r   nodeoutr:   Zf_childZchildeZselsr   r   r   r<   b   s2    



z"PhylogeneticTree.write_newick_nodec                 C   s   | j S r   )r   r5   r   r   r   get_tree~   s    zPhylogeneticTree.get_treec                 C   s   t | j S r   )lenr   
leaf_nodesr5   r   r   r   r      s    zPhylogeneticTree.count_leavesc                 C   s   t | j S r   )rE   r   Znodesr5   r   r   r   count_nodes   s    zPhylogeneticTree.count_nodesc                 C   sB   | j  D ]2}|j}| r$d|_q
tdd | D |_q
d S )Nr   c                 S   s   g | ]}|j jqS r   r   num_leaves_belowr#   jr   r   r   
<listcomp>   s   z0PhylogeneticTree.calc_splits.<locals>.<listcomp>)r   postorder_edge_iter	head_noder=   rI   sumr:   )r   r1   Zndr   r   r   calc_splits   s    zPhylogeneticTree.calc_splitsc                 C   s>   | j j}| }t|dks t| d j}|d k	s:t|S )N   r   )r   r   r:   rE   r   r   )r   minSizerootroot_childrenZ
clade_edger   r   r   get_clade_edge   s    zPhylogeneticTree.get_clade_edgec                 C   s   | j j}| }|r6t|d jds6|   |  }n"|jrF|jj}ntdd |D }d}|}||krldS |d }| j 	 D ]f}|j
dkrq~|j}	|	dkr|dk	r|	|k s~| j|	 |k rq~t||	 }
|
|k r|}|
}|dk	s~tq~|S )zGet centroid edger   rI   c                 S   s   g | ]}|j jqS r   rH   rJ   r   r   r   rL      s   z6PhylogeneticTree.get_centroid_edge.<locals>.<listcomp>NrQ   r   )r   r   r:   hasattrr   rP   r   rI   rO   rM   r   r   absr   )r   rR   rS   rT   r   Zcentroid_edgeZcentroid_imbalanceZ	half_taxar   Zn_descendantsZ	imbalancer   r   r   get_centroid_edge   sF    


z"PhylogeneticTree.get_centroid_edgec                 C   s   d }d}| j  D ]\}|jd kr"qt|j }|d k	rP||k s| j| |k rPq|jd k	r|j|kr|}|j}q|d k	s|t|S )Ng      )	r   rM   r   rE   rN   rF   r   r   r   )r   rR   Zlongest_edgeZlongest_lenr   ZonesideSizer   r   r   get_longest_edge   s$    
z!PhylogeneticTree.get_longest_edgec                    s>    fdd j  D } fdd j D }|| |S )Nc                    s   g | ]}| k	r|qS r   r   r#   r1   rB   r   r   rL      s      z7PhylogeneticTree.get_adjacent_edges.<locals>.<listcomp>c                    s   g | ]}| k	r|qS r   r   rZ   r[   r   r   rL      s      )rN   Zget_incident_edgesr   extend)r   rB   ZheZter   r[   r   get_adjacent_edges   s    
z#PhylogeneticTree.get_adjacent_edgesc                 C   sR   |  dkr| |S |  dkr,| |S |  dkrB| |S td| d S )NcentroidZlongestcladezUnknown break strategy "%s")lowerrX   rY   rU   r?   )r   ZoptionrR   r   r   r   get_breaking_edge   s    


z"PhylogeneticTree.get_breaking_edgec                 C   sj   | j dkrdS | jj}|jd }| j}|j|ddd t|}tt|}|j dkr`|j| |||fS )Nr   NNNr   T)Zupdate_splitsZdelete_outdegree_one)r   r   r   Z_child_nodesZprune_subtreer   r   Zreroot_at_node)r   rS   Zt1_roottt1t2r   r   r   bipartition_by_root   s    


z$PhylogeneticTree.bipartition_by_rootc           
      C   sZ  | j }|j}|jdk	st|jdk	s(t|j|jks8tt| |j}|j}|jj|dd d|j_d|_t	| t
t|d}|j}t|dr|dkr|}|jr|j j|8  _n\|| kr|j j|8  _|}|jr|j j|8  _|jr0|j}|jr|j j|8  _qn|p|}|jr0|j}qt
t|d}	t|j  t|	j  ||	fS )zkPrunes the subtree that attached to the head_node of edge e and
           returns them as a separate tree.NT)Zsuppress_unifurcations)r   rI   )r   rN   r   r   Zparent_nodeis_valid_treeZremove_childr   r   convert_node_to_root_polytomyr   r   r   rV   rI   r:   )
r   rB   rc   ZnrZpotentially_deleted_ndZgrandparent_ndrd   Zn1Zold_rootre   r   r   r   bipartition_by_edge   sF    



z$PhylogeneticTree.bipartition_by_edgec                 C   s   | j  }dd |D S )Nc                 S   s   g | ]}|j jqS r   )Ztaxonr,   rZ   r   r   r   rL   '  s     z4PhylogeneticTree.leaf_node_names.<locals>.<listcomp>)r   rF   )r   Zleavesr   r   r   leaf_node_names%  s    
z PhylogeneticTree.leaf_node_namesFc                 C   sF   |s| j jdd| dS t }| | j j| | }|  |S d S )NZnewickT)schemaZsuppress_rootingZsuppress_internal_node_labels)r   Z	as_stringr   r<   r   getvalueclose)r   labelsZstringIOZretr   r   r   compose_newick)  s     zPhylogeneticTree.compose_newickc                 C   s.   t |d}||   |d |  d S )Nwr    )openr;   ro   rm   )r   pathZtree_handler   r   r   write_newick_to_path5  s    

z%PhylogeneticTree.write_newick_to_pathc                 C   s<   t  }|jt|d|d |jd d }|| _|  | _d S )NZrU)rk   r   )Datasetreadrq   Ztrees_blocksr   r   r   )r   ZtreefileZfile_formatZdatasetr   r   r   r   read_tree_from_file;  s
    z$PhylogeneticTree.read_tree_from_filec                 C   sT   t |dkrd S t| j}t|d tr4|| nt|d trL|| t|S )Nr   )	rE   r   r   r   r0   Zprune_taxa_with_labelsr   Z
prune_taxar   )r   ZtaxaZtreer   r   r   get_subtreeB  s    

zPhylogeneticTree.get_subtreer^   Nc                 C   s   | j }|dkrJ| ||}|dkr&dS td|j|f  | |\}}n|  \}}}td|j |j f  ||j |j  kst|||fS )z(Partition 'tree' into two parts
        r_   Nrb   zbreaking_edge length = %s, %sz(Tree 1 has %s nodes, tree 2 has %s nodes)r   ra   _LOGdebugr   ri   rf   r   )r   Zbreaking_edge_stylerR   ZsnlrB   Ztree1Ztree2r   r   r   bisect_treeL  s"      zPhylogeneticTree.bisect_treeZnormalr   c	              	   C   s6  |dkr>t | j||||d}	t|	D ]\}
}t|||
< q$|S |dkrR| j  n| jjdkrh| j  |dkr|  |krt	| |t
|< |  |ks|dkr&t||  |kr&| ||\}}}|dk	r|||||||| |||||||| n"| |t
|< td||| jf  n| |t
|< |S )	ao  
        This function decomposes the tree until all subtrees are smaller than
        the max size, but does not decompose below min size.
        Two possible decompositions strategies can used: "centroid" and
        "longest".
        Returns a map containing the subtrees, in an ordered fashion.

        SIDE EFFECT: deroots the tree (TODO: necessary?)
        )Zmidpointr^   )strategyZmax_sizeZmax_diamZmin_sizer_   FZhierarchicalr   NzbIt was not possible to break-down the following tree according to given subset sizes: %d , %d:
 %s)r
   r   	enumerater   Zderoot	is_rootedZreroot_at_midpointr   copydeepcopyrE   r	   rj   rz   decompose_treerx   Zwarning)r   ZmaxSizer{   rR   Ztree_mapZdecomp_strategyZ	pdistanceZ	distancesZmaxDiamTr1   rc   rd   re   rB   r   r   r   r   a  sb       



      zPhylogeneticTree.decompose_treec                 C   s&   d}| j  D ]}||_|d7 }qd S )Nr   r   )r   rM   r,   )r   ZenrB   r   r   r   lable_edges  s    zPhylogeneticTree.lable_edgesc           	      K   sj   i }t | j}t| jjD ].\}}d|kr:|d |s:q|||||j< qt|d| }dd |D S )NZfilterTaxonr   c                 S   s   g | ]}|d  qS )r   r   )r#   r@   r   r   r   rL     s     z.PhylogeneticTree.branchOut.<locals>.<listcomp>)r   ZPatristicDistanceMatrixden_treer|   Z	taxon_setr,   r   )	r   ZcenterTaxonZ
subsetSizekwargsZdistZpdmr1   rC   Zincircler   r   r   	branchOut  s    zPhylogeneticTree.branchOut)T)F)r^   N)__name__
__module____qualname____doc__r   r   r6   r<   rD   propertyr   r   rG   rP   rU   rX   rY   r]   ra   rf   ri   rj   ro   rs   rv   rw   rz   r   r   r   r   r   r   r   r   -   s>   
$	&
1


  
8	r   c                 C   s   t t| S r   )r0   id)r3   r   r   r   node_formatter  s    r   c                 C   s   dt t| | jf S )Nz%s %f )r0   r   r   r[   r   r   r   edge_formatter  s    r   c                 C   sx   | j r
dS | r| st| j }t|}|dkr4dS |dkrP|d  rLtdS |dkrt|d  sp|d  rttdS )NTr   r   rQ   )r}   r   r   r:   rE   )rc   ZrcZnum_childrenr   r   r   rg     s    
rg   )r   Zdendropyr   r   r   r   rt   Zdendropy.datamodel.treemodelr   rh   Zseppr   r   Zsepp.alignmentr	   Zsepp.decompose_treer
   r   ImportErrorior~   r"   r!   r-   r   rx   objectr   r   r   rg   r   r   r   r   <module>   s,      