
    瞤do                        d dl mZ d dl mZ d dlZd dlZd dlZd dlZd dlZd dlm	Z	 d dlm
Z
 d dlmZ d dlmZ d dlZd dlZd dlZd dlZd dlmZ d dlZd dlZd dlZdad d	lmZmZ ej                            ej                            e                    d          adadaej                             d t:                     d dl!Z!d
dl"m#Z# ddl$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z,m-Z-m.Z.m/Z/m0Z0m1Z1m2Z2m3Z3m4Z4m5Z5m6Z6m7Z7 ddl8m9Z9m:Z: ddl;m<Z< ddl=m>Z> ddl?m@Z@ ddlAmBZB ddlAmCZC ddlDmEZE ddlFmGZGmHZHmIZImJZJmKZKmLZLmMZMmNZNmOZO ddlAmPZP 	 d
dlQmRZR n# eS$ r dZRY nw xY wdeRz  ZTdZUd ZVddZWeXdk    r eWejY                   dS dS )    )absolute_import)print_functionN)map)range)StringIO)input)defaultdict)ctimetime   )Citator   )SeqGroupgenerate_runidAANTGLOBALSencode_seqnamepjoinpexist
hascontentclear_tempdircolorifyGENCODEsilent_remove_max_min_std_mean_medianiter_cog_seqs)ConfigError	DataError)Task)app_wrapper)schedule)db)apps)	logindent)	is_fileis_dircheck_configbuild_genetree_workflowbuild_supermatrix_workflowparse_blocklist_workflowsblock_detail	list_apps)seqio)__version__unknownaw  
      --------------------------------------------------------------------------------
                  ETE build (%s) - reproducible phylogenetic workflows

      Citation:

       Huerta-Cepas J, Serra F and Bork P. ETE 3: Reconstruction, analysis and
       visualization of phylogenomic data. Mol Biol Evol (2016)
       doi:10.1093/molbev/msw046

      (Note that a list of the external programs used to complete all necessary
      computations will be shown after workflow execution. Those programs should
      also be cited.)

      --------------------------------------------------------------------------------
      
c           
      (  =>? t          j        d          at          d         }| j        r_t          | j                                                  }|t          | j                                                  z  }t          |          >nt          | j                  >t          j
                            | j                  }t          |d          }t          |          r,t          |          r| j        s| j        st#          d          ddddd	>d
<   t%          t&                    }t)                      }t)          ddg          =d=>fd	} || j        d          } || j        d          }	|	r"t/          |          dk    rt#          d          |	rd}
|	}nd}
|}g }d}| j        d} || j        |
d          }i }|D ]}t3          >          }|||<   |||         d         dd                  }i |d<   i |d<   i }t5          j        |          D ]v\  }\  }}t9          |          }|dk    rYt;          | j        |          }||d         |<   t?          j         |tB          |tE          |                    }||d         |<   |||<   wt          ||          |d<   t%          t2                    |d<   t          |d                   rE| j        r>t                              dd|d         z             tG          j$        |d                    	 t          j%        |d                    n# tL          $ r Y nw xY w|
dk    r*||         d         dd         }d|d         z  ||         d<   |r|
|r|n|i fg| j'        | j(        d|d <   d!| j'        i|d <   t          |d"          5 }t          | j                  5 }|)                    |*                                           ddd           n# 1 swxY w Y   ddd           n# 1 swxY w Y   |+                    d#           |
dk    rdd$l,m-} n|
dk    rdd$l.m-} t^          j0        d%k    rd&nd'}tc          td                     | j3        s2t                              d(d)|z             t?          j4        |           t                              d(d*tk                      z             t                              d(d+t          d,         z             t          j
        6                    |d-          }t          |d.          }t          |d/          }| j7        r$t          j
        8                    | j7                  nt          |d0          }t          |d1          } | j9        r$t          j
        8                    | j9                  nt          |d2          }!|!t          d3<   |t          d4<   |t          d/<   |t          d5<   |t          d6<   | t          d7<   t          |!d8          t          d9<   t          |!d:          t          d;<   | j:        st          |!d<          n| j:        t          d=<   | j        r}t                              d(d>           t          t          d6                   rtG          j$        t          d6                   nd t          t          d/                   rtG          j$        t          d/                   nd t          t          d7                   rtG          j$        t          d7                   nd | j:        stw          t          d=                    tw          t          d;                    tw          t          |d?                     tw          t          |d@                     tw          t          |dA                     tw          t          |dB                     n5| j<        rt                              d(dC           t          t          d/                   rtG          j$        t          d/                   nd t          t          d7                   rtG          j$        t          d7                   nd t          t          d;                   rt          j=        t          d;                   nd | j>        r[t          t          d=                   rA| j:        s:t                              d(dD           t          j=        t          d=                    | j        s"|t          d,         k    rt                              dEdF|z             	 tG          j?        t          t          d,         d2          |!           n&# t          $ r}"tc          |"           Y d}"~"nd}"~"ww xY w	 tG          j?        t          t          d,         dG          t          |dG                     n^# t          $ rQ}"	 tG          jA        t          t          d,         d@          |           n# t          $ r
}"Y d}"~"nd}"~"ww xY wY d}"~"nd}"~"ww xY wt          t          j
        6                    |d@                    r3t          B                    dH           dI|z  }t          jC        |           ||| |!fD ]B}#	 t          j%        |#           # tL          $ r t          B                    dJ|#           Y ?w xY w|
dk    r| jD        st#          dK          |
dk    r,t          j
        E                    | jD                  t          dL<   t)                      t          dM<   | jF        rRt          dM         G                    dN           t          j
        H                    | jF                  dO         t          dP<   | jI        rRt          dM         G                    dQ           t          j
        H                    | jI                  dO         t          dP<   t          jK        t          d9                    t          jL        t          d;                    | jM        rjt)          dR t          | jM                  D                       }$|$+                    d#           t                              d(dSt/          |$                     nd}$|
dk    rt)                      }%t)                      }&t          | jD        | jO                  D ]>\  }'}(|(D ]6\  })}*}+|$|*|$v r*|%G                    |*           |&G                    |)           7?|$.|$|%z
  r(t          dTdU6                    |$|%z
            z            n|%}$t          B                    dVt/          |&          t/          |$          fz             nd}&|$t          dW<   d#},t          t          d=                   st          jQ        t          d=                    d}-| jI        r;t          jS        | dQ|&|$|-          }-|&s!t'          |-T                                          }&| jF        rt          jS        | dN|&|$|-          }-nt          jQ        t          d=                    t          B                    dX           |&t          jU                    }-ndt          jU                    }-|&t)          |-T                                          z
  r-t          dYdU6                    |&t          z
            z            t          B                    dZt/          |-          z             |-W                                t          d[<   |,rxt          t          |d\          d"          5 }|)                    d]6                    t                    d^z   |,z              ddd           n# 1 swxY w Y   t          d_          |
dk    r| jY        r~|r{i }.t%          t(                    ?g }/t          | jY                  D ]}0|0H                    d`          \  }1}2|1Z                                }1|1|$v rada |2H                    dU          D             |.|1<   |.|1         D ]6}3|3?vr|/[                    |3           ?|3         G                    |1           7|$t)          |.          z
  r<|$t)          |.          z
  }4t          B                    dbt/          |4          z             ?fdc|/D             }5t                              dddedf6                    dg |5D                                  t)          dh |5D                       }5|.?ft          di<   n|
dk    r|rt#          dj          | j        t          dk<   t          \                    dl| j        z             | j]        rdm}6n| j^        rdn}6ndo}6t                              d(dp           g }7tk                      }8t5          j        |          D ]&\  }}||dq<    |d||          }9|9s|9d         j_        }:|:|dr<   |t          |:<   |7`                    |9           t          t          j
        6                    |d         ds          dt          )                    d`6                    |:t          d9         dfz   g                     d`6                    |8|:tE          | j^                  t          du         g          };t          t          |d         dv          dt          )                    |;dfz              (t          ||7| jb        |6| j\        | jc                  }<t          jd                     |<st          je        dwd          rt          dw         f                                 | jg        r?t                              d(dx           dyt          d         z  }t          jC        |           t                              d(dz           d{t          d         z  }t          jC        |           d|t          d         z  }t          jC        |           t          d}         h                                 dS t          d~          )z Read and parse all configuration and command line options,
    setup global variables and data, and initialize the master task of
    all workflows. mainbasedirete_build.cfgz{Output directory seems to contain data from a previous run. Use --clearall to restart the analysis or --resume to continue.treesplitterz10%g?F)_app_max_outgroup_size_min_outgroup_support_outgroup_topology_distdefault_tree_splittergenetreesupermatrixc           
      &   g }| s|S | D ]}|ri }d |                     d          D             }t          |          dk    r
|d         }n|d         }|d d         D ]m}|                    d          r|                    dd          }	 t	          t          t          |                     d                              \  }}	|dk     s||	k    rt          n # t          $ r t          d	|z            w xY w|	|d
<   ||d<   |                    d          r|                    dd          }	 t          t          |                     d                    \  }
}|
dk    s|
dk     rt          |dk    s|dk     rt          |
|k    rt          n # t          $ r t          d|z            w xY w|
|d<   ||d<   ]t          d|z            |dk    r1|
                    di           v rd d         |         D             }n:|dk    r1|
                    di           v rd d         |         D             }n|g}t          t	          |                    D ]x\  }}d|v rod |                     d          D             }t          j        | D ],}d                    |          }|                    |           -|                    |           yt#          |           |D ]}|dk    r#                    t'          |                     n(|dk    r"                    t)          |                     t+          |           |vr!t-                     t          d|z            |         d         }|vrt          d|d|d          ||k    rt          d|d|d          |rEt          |          dk    r |                    |d         |fg           t          d|z            |                    |           |S ) Nc                 6    g | ]}|                                 S  strip).0_fs     4lib/python3.11/site-packages/ete3/tools/ete_build.py
<listcomp>z1main.<locals>.parse_workflows.<locals>.<listcomp>   s     AAA"((**AAA    ,r   r   zsize-range: -zSsize filter should consist of two integer numbers (i.e. 50-100). Found [%s] insteadmax_sizemin_sizezseq-sim-range:zpsequence similarity filter should consist of two float numbers between 0 and 1 (i.e. 0-0.95). Found [%s] insteadmin_seq_simmax_seq_simzUnknown workflow filter [%s]rA   genetree_meta_workflowc                 8    g | ]}|                     d           S @lstriprH   xs     rJ   rK   z1main.<locals>.parse_workflows.<locals>.<listcomp>   s"    !g!g!gA!((3--!g!g!grL   rB   supermatrix_meta_workflowc                 8    g | ]}|                     d           S rW   rY   r[   s     rJ   rK   z1main.<locals>.parse_workflows.<locals>.<listcomp>   s"    !j!j!jA!((3--!j!j!jrL   c                 8    g | ]}|                     d           S )rM   )split)rH   elems     rJ   rK   z1main.<locals>.parse_workflows.<locals>.<listcomp>   s"    GGGTZZ__GGGrL   zD[%s] workflow or meta-workflow name is not found in the config file.r<   [z] is not a valid workflow: ?z] is not a valid z	 workflowzPMeta-workflows with multiple threads are not allowed as recursive workflows [%s])r`   len
startswithreplacelistr   int
ValueErrorr"   floatget	enumerate	itertoolsproductjoinappendpopprintupdater-   r.   r/   r0   extend)namestarget_wtypeparse_filtersparsed_workflowswknamewfiltersfieldsfrR   rQ   rS   rT   temp_workflowsindex_wwordscomb
real_wnamewtypeVALID_WORKFLOW_TYPESbase_configs                      rJ   parse_workflowszmain.<locals>.parse_workflows   s7    	$## Q	8 Q	8F "QAAv||C/@/@AAAv;;!###AYFF#BZF#CRC[ Q Q<<66 Q !		- ; ;AL59#c1773<<:P:P5Q5Q 2(#+a<<8h3F3F*4$4 4G $. L L L&1  3H  JK  3K  'L  'L  !LL3;HZ03;HZ00\\*:;; Q !		*:2 > >A	i<?qwws||<T<T 8[#.??kAoo*4$4#.??kAoo*4$4#.#<#<*4$4 $=#- i i i&1  3e  gh  3h  'i  'i  !ii6AH]36AH]33"-.La.O"P"PPz))fH`bd8e8e.e.e!g!gE]9^_e9f!g!g!g..6[__Mhjl=m=m3m3m!j!jE`9abh9i!j!j!j"(&tN';';<< . .	r"99GG#GGGE ) 15 9 : :%(XXd^^
&--j9999"&&u---.!!! % a a:--&&'>r'B'BCCCC!]22&&'A"'E'EFFFB,,,[((";///%&lnp&pqqq#B/ 444%+BBBPUPUPU&VWWWL((%+&&&R^R^R^&_``` )  8~&&!++$++nQ.?-J,KLLLL%&x  {A  'A  B  B  B ''7777s   ACC82AFF.r   zbA single genetree workflow must be specified when used in combination with super-matrix workflows.NT)rw   _appsetapp	threadingzbuilt-in_outpath	_nodeinfo   zCleaning result directory %s_alg_concatenatorz@%sr   	_workflow)wf_type	workflowsnt_switch_thr	max_iters_nprr   wrO   )pipelinel        z64 32   z'Testing x86-%s portable applications...z"Starting ETE-build execution at %szOutput directory %s
output_dirgallerysge_jobstmptasksr   r'   db_dirsge_dirgallery_dir	tasks_dir	input_dirznpr.db
nprdb_filezdata.dbdatadb_filezseq.db
seqdb_filez Erasing all existing npr data...zetebuild_data.tarzetebuild_data.tar.gzzetebuild.logzetebuild.log.gzz1Erasing precomputed data (reusing task directory)z%Erasing existing sequence database...   z9Copying previous output files to scratch directory: %s...ztasks/z?Compressed data found. Extracting content to start execution...z\cd %s && gunzip -f etebuild_data.tar.gz && tar -xf etebuild_data.tar && rm etebuild_data.tarzUsing existing dir: %szYSpecies tree workflow requires a list of COGS to be supplied through the --cogs argument.	cogs_fileseqtypesntrN   	inputnameaac                 6    g | ]}|                                 S rE   rF   )rH   lines     rJ   rK   zmain.<locals>.<listcomp>  s     IIItdjjllIIIrL   zEnabling %d speciesz@The following target_species could not be found in COGs file: %srM   z3COG file restriction: %d sequences from %s species target_speciesz)Reusing sequences from existing database!zOThe following sequence names in COGs file are not found in current database: %sz%d target sequencestarget_sequencesz	error.log z

zIErrors were found while loading data. Please check error file for details	c                 Z    g | ](}|                                                                 )S rE   )rG   lowerr[   s     rJ   rK   zmain.<locals>.<listcomp>7  s*    LLLAaggiioo//LLLrL   z%%d species not found in lineages filec                 p    g | ]2}t          |                   d k    |t          |                   f3S )r   )rd   )rH   linlin2sps     rJ   rK   zmain.<locals>.<listcomp>B  sE    dddCPSTZ[^T_P`P`bcPcPcc&+../PcPcPcrL      z)Available levels for NPR optimization:
%sr6   c                     g | ]}d |z  S )z% 30s (%d spcs)rE   r[   s     rJ   rK   zmain.<locals>.<listcomp>C  s     LwLwLwefM^_`M`LwLwLwrL   c                     g | ]
}|d          S )r   rE   )rH   lvs     rJ   rK   zmain.<locals>.<listcomp>D  s    999bBqE999rL   lineagesz_The use of target_levels requires a species lineage file provided through the --lineages option
_max_coreszEnabling %d CPU cores)NF)insituT)r   FzETE build starts now!_name	_configidrunidacmdlinecommand_lines_background_schedulerz Compressing intermediate data...zcd %s && tar --remove-files -cf etebuild_data.tar tasks/ && gzip -f etebuild_data.tar; if [ -e etebuild.log ]; then gzip -f etebuild.log; fi;zDeleting temporal data...zcd %s && rm -rf tmp/zcd %s && rm -rf input/citatorzErrors found in some tasks)F)ilogging	getLoggerlogr   custom_configopenr   	readlinesr,   ospathbasenamer   r   r   clearallresumer"   r	   rg   setworkflowsupermatrix_workflowrd   npr_workflowsdictsix	iteritemsrh   minmaxcoresr(   get_callAPPSPATHstrshutilrmtreemakedirsOSErrorr   r   writereaddiscardete_build_lib.workflow.genetreer   "ete_build_lib.workflow.supermatrixsysmaxsizerr   __DESCRIPTION__nochecks	test_appsr
   ro   r   realpathr   seqdbr   	softclearremove	clearseqscopytreeIOErrorcopywarningsystemr   abspathnt_seed_fileaddr`   aa_seed_filer'   
init_nprdbinit_datadbspfiler!   spname_delimiterr#   
init_seqdbr3   load_sequenceskeysget_seq_name_dictdb_seqsvalues	argumentslineages_filerG   rp   debug
no_executemonitorthreadidrt   r&   schedule_timenoimgcloserk   	terminatecompressshow)@argsbase_dirconcat_config	clearnamelocal_conf_fileworkflow_typesTARGET_CLADESr   genetree_workflowssupermatrix_workflowsWORKFLOW_TYPEmaster_workflowsr   use_npr
run2configry   configappsetapps_to_testkappsrccorescmdconcatenatorOUTPUTINPUTr   archr   r   tmp_dirr   r   r   edirnamer   observed_speciestarget_seqs
cog_numberseq_cogsseqnamespcodeseqcodeERRORseqname2seqidsp2linall_sorted_levelsr   splineager   missingavail_levels	executionpending_tasks
start_time	new_tasks	thread_idcmd_infothread_errorsr   r   r   s@                                                                @@@rJ   r8   r8   {   s    
F
#
#Cy!H  5T-..88::d011;;==="=11"4#344   !122IHo66Oh fo&& 	f= f f! #e f f f  $"%$)	,+ ,+K'( !&&NEEM
M:;;W  W  W  W  W  W  W r )
CC+OD,E}UU  @%7!8!81!<!<~  .%0"- MG%'(:MY]^^^ J" @ @k""#
6vy1!""56 u {"%-"7"7 	& 	&AJJE##DM511).{#A&mAx3u::FF#&ua "%Q #8V44z)$//{&$%% 	.$- 	.GGB6z8JJKKKM&,---	Kz*++++ 	 	 	D	 M))!&>*=>qrrBL058J18M0MF< -  	 ).;O]]62,!%!3!^ F6NN0  !3F6NN 
os	#	# 'v$"## 	'uLL&&&	' 	' 	' 	' 	' 	' 	' 	' 	' 	' 	' 	' 	' 	' 	'' ' ' ' ' ' ' ' ' ' ' ' ' ' ' "
""=======	-	'	'@@@@@@ K%''55TD	/ = %=DEEE|$$$GGB4uww?@@@GGB%(=>??? ',,x33KHj))GHe$$G48Na  000xY`IaIaIh((I.2kURWdk***hPT@U@UFGH GIGEN(GM$GK$GK"6844GL#FI66GM<@JVU68444DJGL } -6777/5gk6J/K/KUgk*+++QU)/)?)?Igen%%%T/5gk6J/K/KUgk*+++QUz 	1',/000gm,---eH&9::;;;eH&<==>>>eHn55666eH&7889999 > 	ZGGBKLLL-3GEN-C-CMFM'%.)))39'+:N3O3OYFM'+.///UY178N1O1OYBIgm,---UY> 	-fW\%:;; 	-DJ 	-GGB?@@@Igl+,,,= X)>>>OQYYZZZ	OE',"7>>GGGG 	 	 	!HHHDDDD		OE',"7BBE(T\D]D]^^^^ 	 	 	E',"79OPPRZ[[[[   	 bgll8$:;;<< UVVVloww
	# Y	6: ; ;	;K     	; 	; 	;KK0':::::	; %%dn% ' ( ( 	( 
-	'	'!wt~>>%%GJ D
%%%!w}}T->??C D
%%%!w}}T->??C M','(((N7=)*** { IItDK7H7HIIIJJr""")3~+>+>???? %%%%ee$1$.$BW$X$X 	- 	- J,4 - -(!)V~-E-E$((000OOG,,,-
 % 00 Q behememn|  ~N  oN  fO  fO  !P  Q  Q  QQ .NICP[L\L\^abp^q^qKrrssss .G E','(( F
gl+,,, 	9!0t[.ZghhM 9"=#5#5#7#788 	i!0t[.ZghhM 	gl+,,,?@@@022MM022MS!3!3!5!5666 F !I$'HH[7-B$C$C!E F F F KK%s='9'99:::"/"6"6"8"8G 9%+..44 	?LL),,v5=>>>	? 	? 	? 	? 	? 	? 	? 	? 	? 	? 	? 	? 	? 	? 	? 8 9 9 	9 %%$*<%%S!!+,, 	( 	(D**T**KBB^##LLs9K9KLLLr
!": ( (C&(()005553KOOB''''CKK' 	O$s6{{2GKK?WMNNN eddd;Lddd@$))LwLwjvLwLwLwBxBxyyy99L999::%v.
 
-	'	'M	'{||| !MGLII%t}4555  *!		 < 	*'II)I GGB'((( MJ-
33 S S wHT6622	 	aL)	'{#	Y''' 	RW\\&,g66<<BB499iY`amYnosYsMtCuCuvvv99j)S5F5FPYHZ[\\U6*%77==CCHTMRRRRX}d6H&
DJ@ @MHJJJ 6;.55 	9+,66888= 	GGB:;;; bi !CIcNNN/000$gi&88
	#&	(::
	#	!!!!!4555s   L11
L>=L>.P(O7+P7O;	;P>O;	?PPP.e5 5
f?ff<g 
h4$.hh/
h'h/"h''h//h4 j55%kk4~~~c           
          |r|a t          j                            d          }t	          |           dk    rt          t                     st          t          dd          t          j	                   t          t          dd          t          j	                   t          t          dd	          t          j	                   t          t          d
d          t          j	                   t          t          dd	          t          j	                   t                       t	          |           dk    rt          t          d          }| d         dk    rt          t                     st          t          dd          t          j	                   t          t          dd          t          j	                   t          t          dd	          t          j	                   t          t          d
d          t          j	                   t          t          dd	          t          j	                   t                       t          j        d           	 t          t          t           d                                                    }n# t          $ r d}Y nw xY wt          dt           z             t          d|z             i }t           j        D ]#}t!          j        |t           dd          }|||<   $t!          j        |           t          j        d           n| d         dv r| d         dk    r)t          t          dd          t          j	                   t)          |          }	 | d         }	n# t*          $ r d }	Y nw xY wt-          ||	           t          j        d           n&| d         dk    rJt)          |          }t/          |t1          | dd                               t          j        d           n| d         dk    rrt)          |          }	 | d         }
n3# t*          $ r& t          d           t          j        d           Y nw xY wt3          |
|           t          j        d           nR| d         dk    r~t	          |           dk    r(t)          |          }t3          | d         |d           n.t          t          |                                                     t          j        d           n| d         d k    rt          d!| d                    t          | d                   r%t)          | d                   }t          d"           n#t          d#           t          j        d$           t          j        d           n4| d         d%k    r(t          t6                     t          j        d           t9          j        t<          t>          z   t8          j         &          }|!                    d'          }|"                    d(d)d*+           |"                    d,d-d.tF          d/0           |"                    d1d2tF          t          d3z   d45           |"                    d6d7tH          d80           |"                    d9d:d;d<d=>           |"                    d?d@dd<dA>           |"                    dBdCtF          dD0           |"                    dEdFtF          dG0           |"                    dHdIdJdKL           |"                    dMdNtH          dOdPQ           |"                    dRdSdTdUL           |"                    dVdWdJdXL           |"                    dYdZdJd[L           |%                                }|"                    d\d]dJd^L           |"                    d_d`dJdaL           |"                    dbdctH          dd0           |"                    dedftF          dg0           |"                    dhditF          dj0           |"                    dkdltH          dmdn5           |"                    dodptF          dq0           |!                    dr          }|"                    dsdtduddvdw>           |"                    dxdydtL          dzd{|           |"                    d}d~dtN          dd|           |"                    ddtH          dd5           |!                    d          }|"                    dddtH          d;d           |"                    ddtP          d0           |"                    ddtP          d0           |"                    ddtP          d0           |"                    ddJd           |"                    ddJd           |"                    ddJd           |"                    ddtH          d0           |"                    ddtN          dd5           |!                    d          }|"                    dddtN          dd5           |"                    dddtL          dd5           |"                    ddtL          dd5           |%                                }|"                    dddJdL           |"                    dddJdL           |"                    dddJdL           |"                    dddJdL           |"                    dddJdL           |"                    dddJdL           |"                    dddJd¬L           |!                    dæ          }|"                    dddtN          g dƢdǬȦ           |"                    dd)ddˬ̦           |)                    |           a*tT          j+        rtT          j+        a 	 t          t          t           d                                                    }n# t          $ r d}Y nw xY wt          dt           z             t          d|z             t          t                     sZt          t          dt           z  d          t          j	                   t          t          dd          t          j	                   dtT          _,        tT          j-        s	 ddl.m/}  |            0                    dҦ           na#  t          dt          t          j1        2                    ddզ          d          z             t          d֦           tg          dצ          xY wtT          j4        s!tT          j5        s|6                    dئ           t          j        7                    tT          j8                  }t          j        9                    |          \  }}|stu          d٦          t          j        7                    tT          j8                  tv          d<   tT          j<        rot          dt          j	                   t          j        7                    tT          j<                  }t{          j>        d|ݦ          }|tv          d<   |tv          d<   ntv          d         tv          d<   tT          j?        tv          d<   tT          j@        tv          d<   tT          jA        tv          d<   tT          jB        dz  tv          d<   tT          jC        tv          d<   dD                    |           tv          d<   t          t                    tv          d<   t1                      tv          d<   t1                      tv          d<   t1                      tv          d<   tT          jG        tv          dl<   d;tv          d<   t                      tv          d<   d tv          d<   d tv          df<   tv          d         I                    d           t          tv          d                   st          jJ        tv          d                    d }t          jK        t          jL        |           t          t          tT                     d S )Nz~/.etetoolkit/r   z)
WARNING: external applications not foundyellow)filez!Install using conda (recomended):lgreenz* conda install -c etetoolkit ete_toolchainwhitezor manually compile from:z, https://github.com/etetoolkit/ete_toolchainr:   checkr   r4   r5   zCurrent Toolchain path: %s zCurrent Toolchain version: %sz/tmp1)r   wlrF  zTWARNING: 'wl' is obsolete and will be removed in the future, use 'workflows' insteadoranger   r(   r  z!Expected a block name, found nonedumpF)colorvalidatezValidating configuration file zEverything okzFile does not existrN   version)descriptionformatter_classz==== Input Options ====z3[check | workflows | apps | show | dump | validate]rc   a  Utility commands:
check: check that external applications are executable.
wl: show a list of available workflows.
show [name]: show the configuration parameters of a given workflow or application config block.
dump [name]: dump the configuration parameters of the specified block (allows to modify predefined config).
validate [configfile]: Validate a custom configuration file.
version: Show current version.
)nargshelpz-cz--custom-configr   zCustom configuration file.)desttyperO  z--base-configr   z/ete_build.cfgzBase configuration file.)rP  rQ  defaultrO  z--tools-dir	tools_dirz0Custom path where external software is avaiable.z-wr   T+zmOne or more gene-tree workflow names. All the specified workflows will be executed using the same input data.)rP  requiredrN  rO  z-mr   zpOne or more super-matrix workflow names. All the specified workflows will be executed using the same input data.z-ar   z3Initial multi sequence file with protein sequences.z-nr   z5Initial multi sequence file with nucleotide sequencesz	--dealigndealign
store_trueziwhen used, gaps in the orginal fasta file will be removed, thus allowing to use alignment files as input.)rP  actionrO  z--seq-name-parserseq_name_parsera  A Perl regular expression containing a matching group, which is used to parse sequence names from the input files. Use this option to customize the names that should be shown in the output files. The matching group (the two parentheses) in the provided regular expression will be assumed as sequence name. By default, all  characthers until the first blank space or tab delimiter are  used as the sequence names.z	^([^\s]+))rP  rQ  rO  rR  z--no-seq-rename
seq_renamestore_falsezVIf used, sequence names will NOT be internally translated to 10-character-identifiers.z--no-seq-checksno_seq_checksz>Skip consistency sequence checks for not allowed symbols, etc.z--no-seq-correctno_seq_correctz]Skip sequence compatibility changes: i.e. U, J and O symbols are converted into X by default.z--ignore-dup-seqnamesignore_dup_seqnameszbIf duplicated sequence names exist in the input fasta file, a single random instance will be used.z--rename-dup-seqnamesrename_dup_seqnameszWIf duplicated sequence names exist in the input fasta file, duplicates will be renamed.z--seqdbr   z$Uses a custom sequence database filez--cogsr   zZA file defining clusters of orthologous groups. One per line. Tab delimited sequence ids. z
--lineagesr  a<  EXPERIMENTAL:A file containing the (sorted) lineage track of each species. It enables NPR algorithm to fix what taxonomic levels should be optimized.Note that linage tracks must consist in a comma separated list of taxonomic levels sorted from deeper to swallower clades (i.e. 9606 [TAB] Eukaryotes,Mammals,Primates)z--spname-delimiterr   _zspname_delimiter is used to split the name of sequences into species code and sequence identifier (i.e. HUMAN_p53 = HUMAN, p53). Note that species name must always precede seq.identifier.z--spfiler   zIf specified, only the sequences and ortholog pairs matching the group of species in this file (one species code per line) will be used. z==== NPR options ====z-rz--recursiver   *zEXPERIMENTAL:Enables recursive NPR capabilities (Nested Phylogenetic Reconstruction) and specifies custom workflows and filters for each NPR iteration.z--nt-switch-thresholdr   gffffff?z`Sequence similarity at which nucleotide based alignments should be used instead of amino-acids. )rP  rU  rQ  rR  rO  z--max-itersr   iz<EXPERIMENTAL:Set a maximum number of NPR iterations allowed.z--first-split-outgroupfirst_splitmidpointzEXPERIMENTAL:When used, it overrides first_split option in any tree merger config block in the config file. Default: 'midpoint' z==== Output Options ====z-oz--outdiroutdirzOutput directory for results.)rP  rQ  rU  rO  z--scratch-dirscratch_dirzyIf provided, ete-build will run on the scratch folder and all files will be transferred to the output dir when finished. z--db-dirr   z.Alternative location of the database directoryz--tasks-dirr   zAOutput directory for the executed processes (intermediate files).z
--compressz<Compress all intermediate files when a workflow is finished.)rX  rO  z	--logfilez_Log messages will be saved into a file named 'etebuild.log' a the root of the output directory.z--noimgz>Tree images will not be generated when a workflow is finished.z--emailemailzCEXPERIMENTAL:Send an email when errors occur or a workflow is done.z--email-report-timeemail_report_timezoEXPERIMENTAL:How often (in minutes) an email reporting the status of the execution should be sent. 0=No reportsz ==== Execution Mode Options ====z-Cz--cpur   zMaximum number of CPU cores available in the execution host. If higher than 1, tasks with multi-threading capabilities will enabled. Note that this number will work as a hard limit for all applications,regardless of their specific configuration.z-tz--schedule-timer  zBHow often (in secs) tasks should be checked for available results.z--launch-timelaunch_time   z?How often (in secs) queued jobs should be checked for launchingz--noexecr  zPrevents launching any external application. Tasks will be processed and intermediate steps will run, but no real computation will be performed.z	--monitorr  a  Monitor mode: pipeline jobs will be detached from the main process. This means that when npr execution is interrupted, all currently running jobs will keep running. Use this option if you want to stop and recover an execution thread or if jobs are expected to be executed remotely.z--resumer   z<If output directory exists, reuse data from it if possible. z
--clearallr   zPIf output directory exists, erase all previous data and start a clean execution.z--softclearr   zlClear all precomputed data (data.db), but keeps task raw data in the directory, so they can be re-processed.z--clear-seqdbr   z9Reload sequences deleting previous database if necessary.z
--nochecksr   zJSkip basic checks (i.e. tools available) everytime the application starts.z#==== Program Interface Options ====z-v	verbosity)r   r   r   ri     z/Verbosity level: 0=very quiet, 4=very  verbose.)rP  rR  rQ  choicesrO  z--debugallzTStart debugging A taskid can be provided, so debugging will start from such task on.)rN  constrO  zToolchain path: %s zToolchain version: %sz=
WARNING: external applications directory are not found at %sz9Use "ete build install_tools" to install or upgrade tools)Treez/tmp/etenpr_img_test.pngzX11 DISPLAY = %sDISPLAYznot detected!z7(You can use --noimg to disable graphical capabilities)zimg generation not supportedz5At least one input file argument (-a, -n) is requiredzInvalid outdirr   z!Creating temporary scratch dir...npr_tmp)prefixdirr9   first_split_outgroup<   r   r   
threadinfor   r   r   color_shellr   r   ETEc                 x    t          j        dd           rt           d                                          t          )Nr   )r   rk   r  KeyboardInterrupt)_signal_frames     rJ   raise_control_cz_main.<locals>.raise_control_cG  s5    ;.55 	9+,66888rL   )Or   r   r   
expanduserrd   r   rr   r   r   stderrr   BASEPATHexitr   readliner   r(   builtin_appsr   r   r,   
IndexErrorr0   r2   r   r1   r   r4   argparseArgumentParserr   __EXAMPLES__RawDescriptionHelpFormatteradd_argument_groupadd_argumentr*   r   add_mutually_exclusive_grouprj   rh   r+   
parse_argsr  rS  	enable_uir	  rO   ro  renderenvironrk   r"   r   r   errorr   rd  r`   ri   r   re  tempfilemkdtemprb  rf  rj  rg  rh  ro   r	   r   r   r   r   r   signalSIGTERMr%   r8   )r  builtin_apps_path
ETEHOMEDIR_config_pathtoolchain_versionr  r  r!  r   r   blockparserinput_groupdup_names_group	npr_groupoutput_group
exec_groupexec_type_groupui_groupro  rd  	final_dirrunpathbase_scratch_dirre  r}  s                             rJ   _mainr    sX     %$##$455J
9~~h 	(GRRY\Ycdddd(>IIPSPZ[[[[(GQQX[Xbcccc(6AA
SSSS(I7SSZ]ZdeeeeGGG 9~~X77Q<7""(## hKXVV]`]ghhhhhBHMMTWT^____hKWUU\_\fggggh:HEECJWWWWhMwWW^a^hiiii.$(x)G)G$H$H$Q$Q$S$S!! . . .$-!!!. /9:::13DDEEE F&    mAx==q		N6"""HQKKKKq\000|t##huw  A  A  HK  HR  S  S  S  S&|44K#A,   ;000HQKKKKq\V##&|44Kk3y}#5#5666HQKKKKq\V##&|44K!!   9::: ,,,HQKKKKq\V##9~~!!*<88Yq\;eDDDDDd<((--//000HQKKKKq\Z''2IaLAAAil## *9Q<88o&&&&+,,,HQKKKKq\Y&&+HQKKK$<1O5=5Y[ [ [F ++,EFFKR#&$F  	& 	& 	& T#4?")">  @ @ @ _=")8<L3L"<  > > > ]"%"T  V V V T
&*#& #R  S S S
 T(>&+#& #U  V V V
 T")#3  4 4 4 T")#5  6 6 6 [y$0#[  \ \ \
 07H"%$B &1  	2 	2 	2 .\$1#,  - - - ._#/!a  c c c /6F#/ "A  B B B ">>@@O  !8?T*6(] ! _ _ _
   !8?T*6(R ! T T T
 YW"%"H  J J J XK")#K  L L L
 \")#R  
 
 
 18J"%s#[  \ \ \ Zh")#K  L L L ))*ABBI4_$)!$!a  b b b
 2$) %%)!7	  8 8 8 ={$) ##+ ^	  ` ` `
 3- ##-"F  H H H ,,-GHHLdJX#&#F  H H H oM#) $c  d d d jx#)#W  Y Y Y m+#)#j  l l l l<$9  : : : k, $E  F F F i#c  e e e ig#&#h  j j j 3:M#&! $U  V V V **+MNNJD'
$% -J  K K K D"3/!&!i  k k k O-!&!f  h h h !==??O  ,(4(Z ! \ \ \ Ki#/"M     JX#/!_  a a a Lz#/!s  u u u
 M#/ "P  Q Q Q O+#/!\  ^ ^ ^ Lz#/!m  o o o
 (()NOOH $["#"KKK &  ' ' ' )3 % E  F F F Y''D~ ">& x!?!?@@IIKK & & &%&	

)***	
!#4
4555( phWYaackllsvs}~~~~hRT\]]dgdnooooDN: >	>DFFMM45555	>$hrz~~i/Y/Y[c&d&ddeeeKLLL<=== NT%6 NLMMMW__T[))Fv..Iw +)***GOODK88GL 31
CCCC7??4+;<<&i=MNNN!,(	$\2	 '+&6G"#zGG>GK#'#9B#>G !-GM),,GI'--GL%%GJ #G"%%%G"&"7G!GM GI GJGKI5!!!')$%% (
GI&'''      M&./222 dsU    4J J$#J$;N NN.P7 7-Q'&Q'4o= =pp?#s# #Au__main__)N)Z
__future__r   r   rerm   errnosix.moves.builtinsr   	six.movesr   r   r   r   r   r   r   r  collectionsr	   filecmpr   r  r   r   r
   r   r`   r   __file__r  r   r  insertr  citationr   ete_build_lib.utilsr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   ete_build_lib.errorsr"   r#   ete_build_lib.master_taskr$   ete_build_lib.interfacer%   ete_build_lib.schedulerr&   ete_build_libr'   r(   ete_build_lib.loggerr)   ete_build_lib.configcheckr*   r+   r,   r-   r.   r/   r0   r1   r2   r3   rK  r4   ImportErrorr   r  r8   r  __name__argvrE   rL   rJ   <module>r     s  R ' & & & & & % % % % % % 				          



                         



 				   # # # # # #   
         7==))(3344Q7 8          ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ;
 9 8 8 8 8 8 8 8 + + + + + + 0 0 0 0 0 0 - - - - - -             + + + + + +B B B B B B B B B B B B B B B B B B B B B B
 !          %%%%%%%   [[[
  $N6 N6 N6bB B B BH z	E#(OOOOO s   >E EE