
    瞤d4                         d Z ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ dZdZ	d	Z
d
ZddlZddlmZ d Zd Zd Zd Zd Zd Zd ZdS )zK
ugly parsers for outfiles of codeml, rst file for sites,
and main outfile
    )absolute_import)print_function)map)filter)rangezFrancois-Jose Serrazfrancois@barrabin.orgGPLv3z0.0N)warnc           	      	  
 d}i }i }i d}d}d
d                     |                     d          dd                   dz   } t          |           D ]X}|                    d          r$t	          t          j        dd	|                    }<|t          j        d
t          |          z   dz   |          rit          j        dd|                    d          d                   }|                    d          rd}d t          j	        d|          D             ||<   d|v r1t	          t          j        dd	|                    }d}i ||<   ||<   d|v r1t	          t          j        dd	|                    }d}i ||<   ||<   9|                    d          rd}t          j        d|          r|i|
                    dd          }t          j        dd|                                                                          }||                             dg                               |d                    g }t          |          D ]}|                    t          |d|z                                 ||                             dt          |          z   g                               t          |d|z                                 ||                             d g                               t!          |                     t	          |d!|z                      }	||                             d"g                               |	           	 ||                             d#g                               t          |d$|z                                 n# t"          $ r |dd%         t%          |d%d                   gz   }t!          |          ||         d          d<   d&
	 ||                             d#g                               |d'         |	dz
                      n# t&          $ r ||         d#= Y nw xY wY nw xY w	 ||                             d(g                               t          |d)|z                                 @# t"          $ r ||         d(= Y Vw xY w||
fd*D             d+S ),z
    parse rst files from codeml, all site, branch-site models.
    return 2 dicts "classes" of sites, and values at each site "sites"
    Nr   F//rstzdN/dS z.* \(K=([0-9]+)\)
\1z^[a-z]+.*(\d+\.\d{5} *){}:   pproportionsc                 ,    g | ]}t          |          S  )float).0vs     =lib/python3.11/site-packages/ete3/evol/parser/codemlparser.py
<listcomp>zparse_rst.<locals>.<listcomp>R   s    MMME!HHMMM    z
\d+\.\d{5}z(BEB)z.*for (\d+) classes.*
BEBz(NEB)NEBzPositively z^ *[0-9]+ [A-Z*-] z +-  z[()]aa      pv   classw   Tzforeground wse   c                 (    i | ]}||         z
  S r   r   )r   kbsa	n_classess     r   
<dictcomp>zparse_rst.<locals>.<dictcomp>   s$    CCCA!Yq\C/CCCr   )classessitesr.   )joinsplitopen
startswithintresubmatchstrfindallreplacestrip
setdefaultappendr   r   max
IndexErrorsumKeyError)pathtypr0   r1   r,   ilinevarprobsclasser-   r.   s             @@r   	parse_rstrK   :   s   
 CGEIAAC88DJJsOOCRC())F2DT

 ?$ ?$??H%% 	BF15$??@@A;8/Q7<dCC &#r4::d#3#3A#677C~~s## $#MMbjt.L.LMMMGCLd??BF4eTBBCCACE#JIcNd??BF4eTBBCCACE#JIcN??M** 	Cx-t44 	||FC((vvr4::<<006688c
tR((//a999q 	N 	NALL%ac++,,,3K""3Q<44;;E$qs)<L<LMMMMsb))00#e**===dAEl##c
w++226:::	'3K""3,,33E4Q<4H4HIIII 
	' 
	' 
	' #2#J#eBCCj//!22E#&u::E#JtR C'c
%%cB//66N+FQJ79 9 9 9 ' ' 'C[%%%'
	'	$3K""D"--44eT1q5\6J6JKKKK 	$ 	$ 	$T"""	$CCCCCCCCE E EsK   ANAQ->P,+Q,Q?QQQQARR*)R*c                    t          dt          |j        d         d                             D ]}|j        dz   t	          |          z   |_        t          | dz   t	          |          z   d          }d}t          |           D ]}|dk    r+|                    dt	          |          z   dz             rd	}3|d	k    r-|                    dt	          |dz             z   dz             r n|d	k    r|                    |           |                                 |dk    rt          d
           |j
        dk    rAd                    |                     d          dd                   dz   }t          |dz   t	          |          z   d          }d}t          |          D ]}|dk    r+t          j        dt	          |          z   dz   |          d	}3|d	k    r-t          j        dt	          |dz             z   dz   |          d}|d	k    r|                    |           |                                 t          |dt	          |          z   t!          | dz   t	          |          z   |                     Nt          |dt	          |          z   t!          | dz   t	          |          z   |                     dS )z/
    for multiple dataset, divide outfile.
    r!   paramsndata_r&   Fz	Data set 
Tz\WARNING: seems that you have no multiple dataset here...
    trying as with only one datasetsiter   Nr   r   	data_)r   r6   
propertiesnamer:   r4   r5   writecloser	   rE   r2   r3   r7   r9   setattr
parse_paml)pamoutmodelnumoutcopyrG   rstrstouts           r   divide_datara      s    ae.x8ABBCC %B %BZ#%C0
FSL3s88+S11&MM 		  		 Du}}??;S		#9D#@AA t||??;SU#;d#BCC t||		$		5== < = = =9((FLL--crc233F:C39s3xx/55FDS		 	' 	'5==xsCyy 0 5t<<HD4<<xsC!G}} 4T 94@@L D4<<LL&&&LLNNNUGc3ii/#C!8%@@B B B B UGc3ii/#C!8%@@B B B BI%B %Br   c           
         t          d                    |                     d          dd                   dz             D ]B}|                    d          r~t	          j        ddt	          j        dd	|                                        d
          \  }}|j                            t          |                    }|	                    d|           |                    d          rt	          j        ddt	          j        dd	|                                        d
          \  }}|j                            t          |                    }|	                    d|           *|                    d          r dS DdS )z[
    only for fb_ancestor model, retrieves ancestral sequences also
    from rst file.
    r   Nr   r   znode #znode#([0-9]+)([A-Z]*)
z\1	\2r   r   rR   nt_sequencezNode #zNode#([0-9]+)([A-Z]*)
sequencezCounts of changes at sites)
r4   r2   r3   r5   r7   r8   _treeget_descendant_by_node_idr6   add_feature)rZ   r[   rG   pamlidseqns         r   get_ancestorrk      sj   
 chhS 1 1#2# 677>??  ??H%% 	&";Z"$&#r4"8"8: ::?%,, FC55sF||DDAMM=#....__h'' 	&";Z"$&#r4"8"8: ::?%,, FC55sF||DDAMM:s++++__:;; 	EE	 r   c                    dt          |j        d         d                   vrt          | |           dS t          |                                           }|j        [ddlm}  |t          j	        dd
                    |                    d                   |_        |j                                         t          |          D ]\  }}|d	k    r|                    d
          rtg |j        d<   t          d          D ]Y}t!          t#          t$          t          j	        d|||z   dz                                          }|j        dxx         |gz  cc<   Z|                    d          r
g |j        d<   d|j        vr|                                }|                    d          r	 t          j        dd|          }t+          |                                d                   |j        d<   t%          |                                d                   |j        d<   nl# t.          $ r_ t          j        dd|          }t+          |                                d                   |j        d<   t%          d          |j        d<   Y nw xY w|                    d          dk    r1t          j	        d|dz             }t3          |j        || |           |                    d          rG	 t%          t          j        dd|                    |j        d<   n# t.          $ r d|j        d<   Y nw xY w|                    d          dk    r|                    d          r~t          j        d|          sWt          j        d ||dz                      r7t7          ||                                d         d!z   ||dz            z              t7          ||            dS )"zs
    parser function for codeml files,
    with values of w,dN,dS etc... dependending of the model
    tested.
    *rM   rN   Nr"   )EvolTreez\(.*\);r   rP   zCodon frequencies under model	codonFreq   z\d\.\d+r!   zNei & Gojobori 1986lnLz.* np: *(\d+)\): +(-\d+\.\d+).*z\1 \2r   npz.* np: *(\d+)\): +(nan).*z-inf..z
\d+\.\.\d+r   zkappa z.*(\d+\.\d+).*r   kappananz +\d+\.\.\d+ +\d+\.\d+ z +( +\d+\.\d+){8}r   )r:   rT   ra   r4   	readlinesre   evolrn   r7   r;   r2   _label_as_paml	enumerater5   statsr   listr   r   rstripr8   r6   r3   
ValueErrorcount_check_paml_labelsr9   _get_values)rZ   r[   	all_linesrn   rF   rG   jlabelss           r   rY   rY      s    #u'1':;;;;VU###f''))I{######h
Irwwy7I7I J J1 MNN""$$$i(( 1 144<<??:;; 	(*EK%BZZ 4 4C
Iy!A#a%?P(Q(QRRSS[)))dV3))))??011 	+(*EK%ek)) {{}}??E"" 		5v@($0 0&)TZZ\\!_&=&=T#&+TZZ\\!_&=&=U## 5 5 5v:($0 0&)TZZ\\!_&=&=T#&+VnnU###	5
 ::dq  Ztcz::FVVUCCC??H%% 	..(-rv7G7<d0D 0D )E )EW%% . . .(-W%%%. ::dq  T__c%:%: 86== 80)QqS/BB O

a(8(=i1o(MNNN%%%c1 1s&   0A4H%%A&JJ3+LL65L6c                 |   |                                 }t          |d                              d          d                   }| j        |                             t	          |d                   t	          |d                   t	          |d                   t	          |d                   t	          |d                   t	          |d                   d	|v r+t	          ||                    d	          dz                      nd
d|v r+t	          ||                    d          dz                      nd
d           d
S )z%
    just to ligther main parser
    r   rs   r!   r"   r$   r'   r*      dNNdS)bLNSr&   r   r   SEdNSEdS)r3   r6   branchesupdater   index)r[   rG   valspaml_ids       r   r   r     s    ::<<D47==&&q)**G	N7""tQx  tQx  tQx  tQx  tQx  tQx  7;t||tDJJ--123337;t||tDJJ--12333	$
 	$
 	 	 	 	 	r   c                    	 t          d |D             d           }n# t          $ r Y dS w xY w|D ]}	 |                     |d                   }t          |j        j                  t          |d                   k    r't          d|z              t          | |||            dS v# t          $ r6 t          |           t          d|z              t          | |||           Y w xY wdS )	z\
     * check paml labels
    Should not be necessary if all codeml is run through ETE.
    c           
      x    g | ]7}t          t          t          |                    d                               8S )rs   )r{   r   r6   r3   )r   xs     r   r   z&_check_paml_labels.<locals>.<listcomp>'  s4    QQQT#sAGGDMM":":;;QQQr   c                     | d         S Nr!   r   )r   s    r   <lambda>z$_check_paml_labels.<locals>.<lambda>(  s
    !A$ r   )keyNr!   r   zPWARNING: labelling does not correspond (bad tree?)!!
         Getting them from z@ERROR: labelling does not correspond!!
       Getting them from )	sortedrA   rf   r6   upnode_idr	   _get_labels_from_pamlprint)treepaml_labelsrZ   r[   	relationsrelnodes          r   r   r   !  sV   
QQ[QQQ .0 0 0		     B B	B11#a&99DDGO$$SV44 35;< = = =%dIvuEEE	 5
  	B 	B 	B#JJJ .067 8 8 8!$	65AAAAA	BB Bs    
--A/B((=C('C(c                 V   ddl m } i }t          |d                                          D ]}t          j        d|          r|t          j        dd|                                                                          \  }}| |z  t          |          |j	        <   
                    dt          |                     |                    d          r n| 
                    dt          t          |           d	z                        |                     d
          D ]X                                rt          t!          fd|                    d         }||j        j	        <   |j        _	        Y ||j                  }	|j        D ]}
|	||
                  |j        |
<   dS )z
    in case problem in labelling... and of course it is not my fault...
    retrieve node_ids from outfile... from relations line.
    This may occur when loading a model that was run outside ETE.
    r   )r^   rz^#[0-9][0-9]*:z#([0-9]+): (.*)z\2 \1r   zSums of codonr!   	postorder)strategyc                 &    | d         j         k    S r   )r   )r   r   s    r   r   z'_get_labels_from_paml.<locals>.<lambda>R  s    !dl(: r   N)r^   r4   rv   r7   searchr8   r=   r3   r6   r   rg   r5   lentraverseis_rootnextr   r   r   )r   r   rZ   r[   r^   old2newrG   namr   r   br   s              @r   r   r   ;  s    Gfc"",,..  9&-- 	76#4i#'::<<1 116 C3JD$'LLGDL!iW666??O,, 	E	 	ic4jj1n!5!5666{33 " "<<>>#8v::::IFFGGJ#* !tEN##H^ 1 1$WQZ0q1 1r   )__doc__
__future__r   r   	six.movesr   r   r   
__author__	__email____licence____version__r7   warningsr	   rK   ra   rk   rY   r   r   r   r   r   r   <module>r      s   P  ' & & & & & % % % % % %                  #
%	 				      OE OE OEd)B )B )BX  &B B BH  $B B B41 1 1 1 1r   