
    瞤dG\                     .   d Z ddlmZ ddlmZ ddlmZ ddlmZm	Z	m
Z
 ddlmZ dd	lmZmZ dd
lmZ ddlZddlZddlmZ dZdZdZdZdZ	 ddlmZ d Zn# e$ r	 ddlmZ Y nw xY w	 ddlm Z  dZ!n# e$ r dZ!Y nw xY wddgZ"d Z# G d de          Z$e$Z%dS )z
this module defines the EvolNode dataytype to manage evolutionary
variables and integrate them within phylogenetic trees. It inheritates
the coretype PhyloNode and add some speciall features to the the node
instances.
    )absolute_import   )which   )	translate)ModelPARAMSAVAIL)write_newick)	PhyloNodeSeqGroup)warnN)mapzFrancois-Jose Serrazfrancois@barrabin.orgGPLv3z0.0a  
Yang, Z., Nielsen, R., Goldman, N., & Pedersen, A. M. 2000.
    Codon-substitution models for heterogeneous selection pressure at amino acid sites.
    Genetics 155: 431-49.
    Retrieved from http://www.pubmedcentral.nih.gov/articlerender.fcgi?artid=1461088&tool=pmcentrez&rendertype=abstract
Yang, Z., & Nielsen, R. 2002.
    Codon-substitution models for detecting molecular adaptation at individual sites along specific lineages.
    Molecular biology and evolution 19: 908-17.
    Retrieved from http://www.ncbi.nlm.nih.gov/pubmed/12032247
Bielawski, J. P., & Yang, Z. 2004.
    A maximum likelihood method for detecting functional divergence at individual codon sites, with application to gene family evolution.
    Journal of molecular evolution 59: 121-32.
    Retrieved from http://www.ncbi.nlm.nih.gov/pubmed/15383915
Zhang, J., Nielsen, R., & Yang, Z. 2005.
    Evaluation of an improved branch-site likelihood method for detecting positive selection at the molecular level.
    Molecular biology and evolution 22: 2472-9.
    Retrieved from http://www.ncbi.nlm.nih.gov/pubmed/16107592
Yang, Z. 2007.
    PAML 4: phylogenetic analysis by maximum likelihood.
    Molecular biology and evolution 24: 1586-91.
    Retrieved from http://www.ncbi.nlm.nih.gov/pubmed/17483113
)chi2c                 2    dt          j        | |          z
  S )Nr   )r   cdf)xys     2lib/python3.11/site-packages/ete3/evol/evoltree.pychi_highr   W   s    q48Aq>>11    )r   )	TreeStyleTFEvolNodeEvolTreec                     | dd         S )z;
    just to return specie name from fasta description
    N    names    r   _parse_speciesr!   e   s     8Or   c            
       (    e Zd ZdZdddeddfdZd Zd Zd"d	Zd
 Z	d Z
d Zd#dZdZej        e                    d  e e ej                              d d          D                       d                     e ej                                        fz  e_        	 	 d$ fd	Zd% fd	Z	 	 d& fd	Zd'dZd Zd Zd(dZexj         ee e                      j        j                            dd          z  c_        d  Zd'd!Z xZ S ))r   a   Re-implementation of the standart TreeNode instance. It adds
    attributes and methods to work with phylogentic trees.

    :argument newick: path to tree in newick format, can also be a string
    :argument alignment: path to alignment, can also be a string.
    :argument fasta alg_format: alignment format.
    :argument sp_naming_function: function to infer species name.
    :argument format: type of newick format
    :argument binpath: path to binaries, in case codeml or SLR are not in global path.

    Nfastar    c                 ,   d| _         |s4t          d          }t          j                            |          d         }|| _        i | _        d| _        t          j	        | f|||d| |r| 
                                 |                     g            dS )zM
        freebranch: path to find codeml output of freebranch model.
        z/tmp/ete3-tmp/ete3r   F)newickformatsp_naming_functionN)workdirr   ospathsplitexecpath_models_EvolNode__gui_mark_moder   __init___label_as_paml	mark_tree)	selfr'   	alignment
alg_formatr)   r(   binpathkwargs	ete3_paths	            r   r1   zEvolNode.__init__y   s     ( 	2fIgmmI..q1G$4 	Lv.@	L 	LDJ	L 	L 	L  	"!!!rr   c                     || _         d S Nr0   )r4   vals     r   _set_mark_modezEvolNode._set_mark_mode   s    "r   c                     | j         S r;   r<   )r4   s    r   _is_mark_modezEvolNode._is_mark_mode   s    ##r   c                     |                                  D ]X}|                                r|dxx         dz  cc<   |                    d|d                    |                    |           YdS )zS
        nid needs to be a list in order to keep count through recursivity
        r   r   node_idN)get_childrenis_leafadd_feature_label_internal_nodes)r4   nidnodes      r   rF   zEvolNode._label_internal_nodes   s     %%'' 	, 	,D||~~ FFFaKFFFYA///&&s++++	, 	,r   c                     d}t                     t          t                                                               k    r0 fd                                 D             }t          d|          t	           d           D ]}|                    d|           |dz  }                     d|                                |g           dS )a  
        to label tree as paml, nearly walking man over the tree algorithm
        WARNING: sorted names in same order that sequence
        WARNING: depends on tree topology conformation, not the same after a swap
        activates the function get_descendants_by_pamlid
        r   c                 j    g | ]/}                                                     |          d k    -|0S )r   )get_leaf_namescount).0nr4   s     r   
<listcomp>z+EvolNode._label_as_paml.<locals>.<listcomp>   sG     5 5 5A$$&&,,Q//!33 333r   z(EvolTree require unique names for leavesc                     | j         S r;   r   r   s    r   <lambda>z)EvolNode._label_as_paml.<locals>.<lambda>   s    qv r   )keyrB   N)lensetrK   	ExceptionsortedrE   rF   )r4   rG   duplisleafs   `   r   r2   zEvolNode._label_as_paml   s     II#c$"5"5"7"78899995 5 5 5!4!4 " " 5 5 5FFOOO4%5%5666 	 	DY,,,1HCCC(((""C5)))))r   c                     	 |                                  D ]}|j        |k    r|c S | j        |k    r| S dS # t          $ r t          d           Y dS w xY w)zC
        returns node list corresponding to a given idname
        zNShould be first labelled as paml (automatically done when alignemnt is loaded)N)iter_descendantsrB   AttributeErrorr   )r4   idnamerN   s      r   get_descendant_by_node_idz"EvolNode.get_descendant_by_node_id   s    	B**,,  9&&HHH '|v%% &% 	B 	B 	B A B B B B B B	Bs   $7 7 AAc                     t                      }| D ]>}|j        |j        |j        <   |j        |j        |j        <   |j        |j        |j        <   ?|                    |d           dS )z.
        to write algn in paml format
        pamloutfiler(   N)r   nt_sequenceid2seqrB   r    id2namename2idwrite)r4   fullpath	seq_grouprN   s       r   _write_algnzEvolNode._write_algn   sn     JJ	 	2 	2A*+-IQY'+,6Iai(()	Iaf%%88888r   Tc                    ddl m}m} t          || fi |}t          j                            | j        |j                  }t	          j	        d|z             | 
                    |dz              |j        d         dk    r|                     |dz   d	           n)|                     |dz   |j        d
         rdnd	           |dk    r|                    |dz             }n&t          |dz   d                              |           t	          j                    }	t	          j        |           t          j                            | j        |j        d                   }
	  ||
dg|||          }n0# t$          $ r# t'          d                    |
                    w xY w|                    d          \  }}|                    t.          j        j                  }t	          j        |	           |rt5          d|z              dS |rGt7          |d|           |                     t          j                            |d          |           dS dS )a  
        To compute evolutionnary models.     e.g.: b_free_lala.vs.lele, will launch one free branch model, and store
        it in "WORK_DIR/b_free_lala.vs.lele" directory

        WARNING: this functionality needs to create a working directory in "rep"

        WARNING: you need to have codeml and/or SLR in your path

        The models available are:

        =========== ============================= ==================
        Model name  Description                   Model kind
        =========== ============================= ==================
%s
        =========== ============================= ==================


        **Note that M1 and M2 models are making reference to the new versions
        of these models, with continuous omega rates (namely M1a and M2a in the
        PAML user guide).**

        :argument model_name: a string like "model-name[.some-secondary-name]" (e.g.: "fb.my_first_try", or just "fb")
                              * model-name is compulsory, is the name of the model (see table above for the full list)
                              * the second part is accessory, it is to avoid over-writing models with the same name.
        :argument ctrl_string: list of parameters that can be used as control file.
        :argument True keep: links the model to the tree (equivalen of running `EVOL_TREE.link_to_evol_model(MODEL_NAME)`)
        :argument kwargs: extra parameters should be one of: %s.
        r   )PopenPIPEzmkdir -p %sz/algnexecSlrz/tree   ra   
allow_mark
   	   r$   z/tmp.ctlwztmp.ctl)stdoutstdinstderrz1ERROR: {} not installed, or wrong path to binary
   
zERROR: inside codeml!!
r   runoutN)
subprocessrl   rm   r   r+   r,   joinr*   r    systemrj   
propertiesrg   get_ctrl_stringopengetcwdchdirr.   OSErrorrV   r(   communicatedecodesysru   encodingr   setattrlink_to_evol_model)r4   
model_namectrl_stringkeepr8   rl   rm   	model_objrh   hlddirbin_procry   errs                 r   	run_modelzEvolNode.run_model   s   6 	+********d55f55	7<<in==
	-(*+++G+,,,'500JJx/J====JJx/%.%9,%GNrrQ  Q Q Q "#33HZ4GHHKK*$c**00===
w||DM9+?+GHH	H5$	*4#D2 2 2DD 	H 	H 	H 9:@&,,H H H	H ##E**Sjj,--
 	+c12221 	NIuc***##BGLL5$A$A9MMMMM	N 	Ns   "E4 4-F!
c           	      v    g | ]6}d d|z  ddt           |         d         ddt           |         d         dd7S )	z
          z%s8z   evol27typ15z  r
   )rM   r   s     r   rO   zEvolNode.<listcomp>  sa     b b bGHqDAJJJJa 0 0 0 0%(5////C b b br   c                 (    t           |          d         S )Nr   r   rQ   s    r   rR   zEvolNode.<lambda>  s    SXYZS[\aSb r   )rS   reversez, r`   c                 \    t          t          |           j        |fd|i| d}|                                 D ]p}t	          t          |j                            }|r||k    rt          d           |}t          |j                  |_        |rt          |j                  |_        qdS )a`  
        same function as for phyloTree, but translate sequences if nucleotides
        nucleotidic sequence is kept under node.nt_sequence

        :argument alignment: path to alignment or string
        :argument alg_format: one of fasta phylip or paml
        :argument True alignment: set to False in case we want to keep it untranslated

        r6   r   z0WARNING: sequences with different lengths found!N)
superr   link_to_alignmentiter_leavesrT   strsequencer   rc   r   )	r4   r5   r6   nucleotidesr8   	check_lenrY   seq_len	__class__s	           r   r   zEvolNode.link_to_alignment"  s     	0h/	 	Q 	Q;E	QIO	Q 	Q 	Q	$$&& 	< 	<D#dm,,--G IY'11GHHHI"4=11D < )$*: ; ;	< 	<r   c                    t           r>|st                      }n|}|r|D ]}	 |                     |          }n"# t          $ r t	          d|z             Y nw xY wd|j        vrWt          |          dk    r0|                    |          dk    r|                    d           n|                                 |j        d         j	        r'|j
                            |j        d         d           |j                            |j        d         d           t          t          |                               ||           d	S t!          d          )
aJ  
        call super show of PhyloTree
        histface should be a list of models to be displayes as histfaces

        :argument layout: a layout function
        :argument None tree_style: tree_style object
        :argument Nonehistface: an histogram face function. This is only to plot selective pressure among sites

        model %s not computedhistfacer   r   Fup)layout
tree_styleTreeview module is disabledN)TREEVIEWr   get_evol_modelr\   r   r~   rT   indexset_histfacer   aligned_headeradd_facealigned_footr   r   show
ValueError)r4   r   r   	histfacestshistmdlr   s          r   r   zEvolNode.show9  s     	<  [[ ;% ; ;D?"11$77) ? ? ?4=>>>>>?%77y>>A--)//$2G2G12L2L,,,6666,,...~j14 ;)22N:6; ; ; ; 00N:6; ; ; ;(D!!&&f&DDDDD:;;;   :AAc                    t           r?|st                      }n|}|r|D ]}		 |                     |	          }
n"# t          $ r t	          d|	z             Y nw xY wd|
j        vrWt          |          dk    r0|                    |	          dk    r|
                    d           n|
                                 |
j        d         j	        r'|j
                            |
j        d         d           |j                            |
j        d         d           t          t          |                               |||||          S t!          d          )	a  
        call super show adding up and down faces

        :argument layout: a layout function
        :argument None tree_style: tree_style object
        :argument Nonehistface: an histogram face function. This is only to plot selective pressure among sites

        r   r   r   r   Fr   )r   r   rt   hr   )r   r   r   r\   r   r~   rT   r   r   r   r   r   r   r   r   renderr   )r4   	file_namer   rt   r   r   headerr   r   r   r   r   s              r   r   zEvolNode.render]  s     	<  [[ ;% ; ;D?"11$77) ? ? ?4=>>>>>?%77y>>A--)//$2G2G12L2L,,,6666,,...~j14 ;)22N:6; ; ; ; 00N:6; ; ; ;4((//	&;=23q 0 : : : :;;;r   Fc           	         ddl m} t          t          t          |                    }d|v rt          |d                   }ndgt          |          z  }|                                 D ]}t          |d          s|j        |v rd||	                    |j                           v s+ |d||	                    |j                                     	 |rt          d	| j        j        z              |                    d
d||	                    |j                           z              d
|j        vr|                    d
d           dS )a  
        function to mark branches on tree in order that paml could interpret it.
        takes a "marks" argument that should be a list of #1,#1,#2
        e.g.:
        ::

          t=Tree.mark_tree([2,3], marks=["#1","#2"])

        :argument node_ids: list of node ids (have a look to node.node_id)
        :argument False verbose: warn if marks do not correspond to codeml standard
        :argument kargs: mainly for the marks key-word which needs a list of marks (marks=['#1', '#2'])

        r   )matchmarksz#1rB   .z#[0-9]+Nz?WARNING: marks should be "#" sign directly followed by integer
mark r$   )rer   listr   intrT   traversehasattrrB   r   r   r3   __doc__rE   features)r4   node_idsverbosekargsr   r   rH   s          r   r3   zEvolNode.mark_tree  s    	C**++ew((EEF3x==(EMMOO 	- 	-D4++ |x''5!=!=>>>E)t| < <=? ?BFGLSG 137>3IJ K K K  ChnnT\&B&B CCE E E Et},,  ,,,	- 	-r   c                    t          |t                    rt          || |          }n|                    |           |j        | j        v r||j                            d          d         t          d|j        v r0t          |j                            d          d                   dz   nd          z   |_        |j        | j        v ||| j        |j        <   t          j	        
                    |          st          d|z              dS t          | j                  dk    r+|j        d         dk    r|                     d|d	           d
S d
S d
S )a  
        link EvolTree to evolutionary model
          * free-branch model ("fb") will append evol values to tree
          * Site models (M0, M1, M2, M7, M8) will give evol values by site
            and likelihood

        :argument path: path to outfile containing model computation result
        :argument model: either the name of a model, or a Model object (usually empty)

        __r   r   zERROR: not a file: rn   codemlbLT)fillN)
isinstancer   r   _loadr    r/   r-   r   r+   r,   isfiler   rT   r~   change_dist_to_evol)r4   r,   models      r   r   zEvolNode.link_to_evol_model  sW    eS!! 	%t,,EEKKjDL(())$//2S5:%% UZ%%d++A.//!33+,6. 6. .EJ jDL(( $)UZ w~~d## 	&-...1t|!!e&6v&>(&J&J$$T5t$<<<<< "!&J&Jr   c                 d    	 | j         |         S # t          $ r t          d|z             Y dS w xY w)z
        returns one precomputed model

        :argument modelname: string of the name of a model object stored
        :returns: Model object
        zERROR: Model %s not found.N)r/   KeyErrorrV   )r4   	modelnames     r   r   zEvolNode.get_evol_model  sQ    	B<	** 	B 	B 	B2i@AAAAAA	Bs    //rr   c           
      v   ddl m} t          |          dk    r4dt          |           z  }| |ddt	          | dgd	                    z  }nDt          |          d
k    r |ddt	          | dgd	                    }nt	          | ||	          }|%t          |d                              |           |S |S )z
        Inherits from Tree but adds the tenth format, that allows to display marks for CodeML.
        TODO: internal writting format need to be something like 0
        r   )subrp   z %s 1
z\[&&NHX:mark=([ #0-9.]*)\]z\1r   rs   )r   r(   rr   Nrt   )r   r   r   rT   r   r   rg   )r4   r   rb   r(   r   nwks         r   rg   zEvolNode.write  s    
 	v;;"s4yy)C333U#DF8AFFFH H HCC[[B#2E"46(1EEEG GCC thvFFFC#$$S)))JJr   zargument formatzargument 10 formatc                    |                      |          }|                      |          }|j        |j        k    rt          d           dS 	 t          |d          rt          |d          r|j        |j        z
  dk     rGt          dt          |j        |j        z
            z  t          |j        |j        z
                      S t          d|j        |j        |j        |j        z
  fz  dz              dS dS dS # t          $ r= t          d	|j	        d
|j	        d           t          | j        j                   Y dS w xY w)ad  
        Returns pvalue of LRT between alternative model and null model.

        usual comparison are:

        ============ ======= ===========================================
         Alternative  Null    Test
        ============ ======= ===========================================
          M2          M1      PS on sites (M2 prone to miss some sites)
                              (Yang 2000).
          M3          M0      test of variability among sites
          M8          M7      PS on sites
                              (Yang 2000)
          M8          M8a     RX on sites?? think so....
          bsA         bsA1    PS on sites on specific branch
                              (Zhang 2005)
          bsA         M1      RX on sites on specific branch
                              (Zhang 2005)
          bsC         M1      different omegas on clades branches sites
                              ref: Yang Nielsen 2002
          bsD         M3      different omegas on clades branches sites
                              (Yang Nielsen 2002, Bielawski 2004)
          b_free      b_neut  foreground branch not neutral (w != 1)
                               - RX if P<0.05 (means that w on frg=1)
                               - PS if P>0.05 and wfrg>1
                               - CN if P>0.05 and wfrg>1
                               (Yang Nielsen 2002)
          b_free      M0      different ratio on branches
                              (Yang Nielsen 2002)
        ============ ======= ===========================================

        **Note that M1 and M2 models are making reference to the new versions
        of these models, with continuous omega rates (namely M1a and M2a in the
        PAML user guide).**

        :argument altn: model with higher number of parameters (np)
        :argument null: model with lower number of parameters (np)

        z7first model should be the alternative, change the orderg      ?lnLr   r   zS
WARNING: Likelihood of the alternative model is smaller than null's (%f - %f = %f)z[
Large differences (> 0.1) may indicate mistaken assigantion of null and alternative modelsr   zat least one of z or z, was not calculatedN)r   npr   r   r   r   absfloatr   r    exitget_most_likelyr   )r4   altnnulls      r   r   zEvolNode.get_most_likely  s   P ""4((""4((7TWJKKK3	/tU## 
e(<(< 
8dh&**#ADHtx,?(@(@$@$)$'DG*;$<$<> > >  >!XtxDH1DAFFFF G G G
 1
 
 
 
  	/ 	/ 	/D4999CG999N O O O%-......	/s   A9C< /C< <AEEc                    |j         sdS |                                 D ]g}||j         |j                 vr|j         |j                 |         |_        |r1dD ].}|                    ||j         |j                 |                    /hdS )a  
        change dist/branch length of the tree to a given evolutionary
        variable (dN, dS, w or bL), default is bL.

        :argument evol: evolutionary variable
        :argument model: Model object from which to retrieve evolutionary variables
        :argument False fill: do not affects only dist parameter, each node will be annotated with all evolutionary variables (nodel.dN, node.w...).
        N)dNdSrt   r   )branchesr[   rB   distrE   )r4   r   r   r   rH   es         r   r   zEvolNode.change_dist_to_evol!  s     ~ 	F))++ 	I 	ID5>$,777t|4T:DI I0 I IA$$Qt|(DQ(GHHHH	I 	Ir   r;   )r$   T)r`   T)NNN)NNNNNN)F)NNrr   )!__name__
__module____qualname__r   r!   r1   r>   r@   rF   r2   r^   rj   r   sepr|   rW   r
   keysr   r	   r   r   r   r3   r   r   rg   r   r   replacer   r   __classcell__)r   s   @r   r   r   l   s       
 
 #dw$21   0# # #$ $ $	, 	, 	, 	,* * *(B B B	9 	9 	9?N ?N ?N ?N@ C!)	 b bLRFSYSYZdZ_ZdZfZfSgSg nc nc[_Ma Ma Mab b b 
c 
c 
44&&	'	'		))I$ 7=&*< < < < < <."< "< "< "< "< "<H 8<7;$< $< $< $< $< $<L -  -  -  -D= = =6
B 
B 
B   ( 
MMUU9iikk228@HH/1 1 1MM</ </ </|I I I I I I I Ir   )&r   
__future__r   tools.utilsr   utilsr   r   r   r	   r
   parser.newickr   r$   r   r   warningsr   r   r+   	six.movesr   
__author__	__email____licence____version____references__scipy.statsr   r   ImportErrortreeviewr   r   __all__r!   r   r   r   r   r   <module>r     s  P  ' & & & & &             ' ' ' ' ' ' ' ' ' ' ( ( ( ( ( ( " " " " " " " "       



 				      "
#	0       1111       $$$$$$ HH    HHH
 z
"  GI GI GI GI GIy GI GI GIV s$   	A A$#A$(A1 1A;:A;