U
    L_                     @   sZ  d Z ddlmZmZ ddlmZ ddlmZ ddlm	Z	 ddl
Z
ddlZddlZddlZddlZddlmZ ddlZddlZddlZzejefZW n ek
r   ejZY nX eeZG dd	 d	eZG d
d deZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZ G dd deZ!G dd deZ"dS )z,
Created on Sep 19, 2012

@author: smirarab
    )JobJobError)
get_logger)abstractmethod)PopenN)PhylogeneticTreec                   @   s~   e Zd ZdZdddZdd ZeeZdd ZeeZ	d	d
 Z
dd Zedd Zedd Zedd Zdd Zdd ZdS )ExternalSeppJobz
    All Sepp jobs that run external programs
    should extend this abstract class.
    This class handles executing external jobs, error handling, and more.
    Nc                 K   sx   t |  || _d | _t|| _d | _d  | _| _d | _	d| _
d| _t | _|rX|| _ntj | jj| _d| _d S )NF)r   __init__job_type_iddict_kwargs_process
stdoutdata
stderrdata	stdindataZignore_errorfake_run
attributespathseppconfigoptions__getattribute__results_on_temp)selfZjobtyper   kwargs r   (lib/python3.8/site-packages/sepp/jobs.pyr	   #   s     


zExternalSeppJob.__init__c                 C   s   | j S N)r   r   r   r   r   get_id>   s    zExternalSeppJob.get_idc                 C   s   | j S r   )r   r   r   r   r   get_processB   s    zExternalSeppJob.get_processc              
   C   s  | j r| jS | jr|  S zbtd| j|  f  | j dksHtdd| j	kr`t
j| j	d< n&t| j	d trt| j	d d| j	d< d| j	krt
j| j	d< n&t| j	d trt| j	d d| j	d< | jdk	rt
j| j	d< |  d	 d
d	ks"tj|  d	 s"td| j|  d	 f tdddd |  D  t|  fddi| j	| _| jj| _| jdk	r| jj| jd\| _| _n| j \| _| _| jjd	krtd| j|  f  td| j|  | jj| jrt | jdkrd| jd	d t | jf n| jf  nXtd| j|  | jj| j| ! f  t"ddd|  | ! rl| ! ndg|  W S  t#k
r   t$%   Y nX dS )z Runs the external job, and handles errors, piping, checkpointing,
        etc. get_invocation() needs to be implemented in child classes.
        zStarting %s Job with input: %sFzJob is already run.stdoutwstderrNstdinr   /z path for %s  does not exist (%s)zInvocation of %s c                 s   s"   | ]}|d k	rt |ndV  qd S )Nz
?NoneType?)str).0xr   r   r   	<genexpr>k   s   z&ExternalSeppJob.run.<locals>.<genexpr>Zuniversal_newlinesT)inputzFinished %s Job with input: %szAFinished %s Job with input: %s with:
 return code: %s
 output: %sd   z%s ... (continued: %d ) ...zKFinished %s Job with input: %s with:
 return code: %s
 output: %s
 error:%s
zThe following execution failed:zNo error messages available)&Z
result_setresultr   read_results_LOGdebugr
   characterize_inputAssertionErrorr   
subprocessPIPE
isinstancer(   openr   get_invocationcountosr   existsjoinr   r   pidr   Zcommunicater   r   process
returncodeinfolenread_stderrr   	Exception	traceback	print_excr   r   r   r   runF   s     






 
 
zExternalSeppJob.runc                 C   sH   | j dk	r| j S d| jkr@t| jd tr@t| jd jd S dS dS )zL
        Used for reading standard error when an error is detected.
        Nr$   r)r   r   r7   	filetypesr8   namereadr   r   r   r   rC      s    

zExternalSeppJob.read_stderrc                 C   s   t ddS )z
        The method needs to return a list with the first argument giving
        the executable, and the rest giving the arguments.
        z2get_invocation should be implemented by subclassesNNotImplementedErrorr   r   r   r   r9      s    zExternalSeppJob.get_invocationc                 C   s   dS )z
        Need to implement this method to help with automatic logging.
        Output a string characterizing the input to this job
         r   r   r   r   r   r3      s    z"ExternalSeppJob.characterize_inputc                 C   s   t ddS )a  
        This method should read the results of an external execution, and turn
        the results into a python object (could be simply the path to an output
        file) and return that python object. This is the result that will be
        ultimately pickled and sent to the main process, and will be accessible
        to the other jobs, joins, etc. Better not to pass around large files.
        This results should be picklable.
        z0read_results should be implemented by subclassesNrL   r   r   r   r   r0      s    
zExternalSeppJob.read_resultsc                 C   s
   | j | S )B each job maintains a dictionary of free form attributes.
        r   )r   keyr   r   r   get_attribute   s    zExternalSeppJob.get_attributec                 C   s   || j |< dS )rO   NrP   )r   rQ   valr   r   r   set_attribute   s    zExternalSeppJob.set_attribute)N)__name__
__module____qualname____doc__r	   r    propertyidr!   r?   rG   rC   r   r9   r3   r0   rR   rT   r   r   r   r   r      s    
[


r   c                   @   sD   e Zd ZdZdd ZdddZdd	d
Zdd Zdd Zdd Z	dS )HMMBuildJobz1
    The Job class that executes a HMM build
    c                 K   s@   d| _ tj| | j f| d | _d | _d | _d | _d| _d| _d S )NZhmmbuildTrN   )	r
   r   r	   infileinformatoutfilemoleculesymfracr   r   r   r   r   r   r	      s    zHMMBuildJob.__init__Tfastadnac                 K   s4   || _ || _|| _|| _|| _d|kr0|d | _dS )
        Use this to setup the job if you already have input file written to a
        file. Use setup_for_subproblem when possible.
        r   N)r\   r]   r^   r_   r`   r   )r   r\   r^   r`   r]   r_   r   r   r   r   setup   s    zHMMBuildJob.setupc                 K   s   t |tjjstt |jtjjs&t|| _tj	d|d| _
|| j
 d| _|| _tj	d|| _|| _d|kr~|d | _dS )z
        Automatically sets up a job given a subproblem object. It outputs the
        right alignment subset to a temporary file.
        zhmmbuild.input..fastarb   zhmmbuild.model.r   N)r7   r   problemSeppProblemr4   ZsubalignmentZReadonlySubalignmentr`   filemgrtempfile_for_subproblemr\   Z)write_subalignment_without_allgap_columnsr]   r^   r_   r   )r   
subproblemr`   r_   r   r   r   r   setup_for_subproblem   s*        z HMMBuildJob.setup_for_subproblemc                 C   s   | j ddddd| j g}| jdkr0|ddg | jd	krJ|| j  | jd
krb|ddg || j| jg t	
dd|  |S )Nz--erez0.59--cpu1--%sTz	--symfracz0.0rN   rb   z
--informatZafazRunning HMMBUILD command: %sr'   )r   r_   r`   extendr   splitr]   r^   r\   r1   r2   r=   r   invocr   r   r   r9     s    


zHMMBuildJob.get_invocationc                 C   s   | j S r   )r\   r   r   r   r   r3     s    zHMMBuildJob.characterize_inputc                 C   s2   t j| jstt | jtj dks,t| jS )z
        Simply make sure the file exists and is not empty. Don't need to load
        the file into memory or anything else. Just return the location of the
        file.
        r   )r;   r   r<   r^   r4   statST_SIZEr   r   r   r   r0     s    zHMMBuildJob.read_resultsN)Trb   rc   )Trc   
rU   rV   rW   rX   r	   re   rl   r9   r3   r0   r   r   r   r   r[      s   
  
  
r[   c                   @   s@   e Zd Zdd ZdddZdd	d
Zdd Zdd Zdd ZdS )HMMAlignJobc                 K   s@   d| _ tj| | j f| d | _d | _d | _d | _d | _d | _d S )NZhmmalign)	r
   r   r	   hmmmodel	fragmentsr^   base_alignmenttrimr_   ra   r   r   r   r	   '  s    zHMMAlignJob.__init__NTrc   c                 K   s.   || _ || _|| _|| _|| _|| _|| _dS )zy
        Setup job parameters when those are externally decided.
        Use setup_for_subproblem when possible.
        N)rx   ry   r^   rz   r{   r_   r   )r   rx   ry   output_filerz   r{   r_   r   r   r   r   re   1  s    zHMMAlignJob.setupFc                 K   sJ   t |tjjsttjd|| _tjd|d| _|| _	|| _
|| _dS )zAutomatically sets up a job given a subproblem object. Note that
        hmmmodel is not set and fragments is just a filename that needs to be
        created later. base_alignment is not set either.
        zhmmalign.results.zhmmalign.frag.rf   N)r7   r   rg   rh   r4   ri   rj   r^   ry   r{   r_   r   )r   rk   r{   r_   r   r   r   r   partial_setup_for_subproblem?  s       z(HMMAlignJob.partial_setup_for_subproblemc                 C   s\   | j d| j d| jg}| jr(|dg d| jkrF|| jd   || j| jg |S )Nro   -oz--trimuser_options)	r   r_   r^   r{   rp   r   rq   rx   ry   rr   r   r   r   r9   P  s     
zHMMAlignJob.get_invocationc                 C   s   d| j | j| j| jf S )Nz2model:%s, fragments:%s, trim:%s, base_alignment:%s)rx   ry   r{   rz   r   r   r   r   r3   ]  s       zHMMAlignJob.characterize_inputc                 C   s@   | j r
dS tj| jr8t| jtj dks2t| jS dS dS )
        Since the output file can be huge, we don't want to read it here,
        because it will need to get pickled and unpickled. Instead, we just
        send back the file name, and will let the caller figure out what to do
        with it.
        Nr   )r   r;   r   r<   r^   rt   ru   r4   r   r   r   r   r0   a  s    zHMMAlignJob.read_results)NTrc   )Frc   	rU   rV   rW   r	   re   r}   r9   r3   r0   r   r   r   r   rw   &  s   
    
   
rw   c                   @   sH   e Zd Zdd ZdddZdddZd	d
 Zdd Zdd Zdd Z	dS )HMMSearchJobc                 K   s   d| _ tj| | j f| d | _d | _d | _d | _d | _tt	j
 jdrft	j
 jj  dk| _nd| _| j | _tdt| jt| jf  d S )N	hmmsearchpipedtrueTz+HmmSearch: Piped?: %s and keep on temp?: %s)r
   r   r	   rx   ry   r^   elimfiltershasattrr   r   r   r   r   striplowerpiper   r1   r2   r(   ra   r   r   r   r	   s  s"    
 zHMMSearchJob.__init__NTc                 K   s(   || _ || _|| _|| _|| _|| _d S r   )rx   ry   r^   r   r   r   )r   rx   ry   r|   r   r   r   r   r   r   re     s    zHMMSearchJob.setupc                 K   sJ   t |tjjsttjd|| _|| _| js4d| _	|| _
|| _|| _dS )z
        Automatically sets up a job given a subproblem object.
        Note that hmmmodel is not setup and needs to be set separately.
        zhmmsearch.results.TN)r7   r   rg   rh   r4   ri   rj   r^   ry   r   r   r   r   )r   Zfragments_filerk   r   r   r   r   r   r   r}     s     z)HMMSearchJob.partial_setup_for_subproblemc                 C   s   | j dddg}| js$|d| jg | jd k	rB|dt| jg | jsT|dg d| jkrr|| jd   || j	| j
g |S )Nz--noalirm   rn   r~   z-Ez--maxr   )r   r   rp   r^   r   r(   r   r   rq   rx   ry   rr   r   r   r   r9     s    

zHMMSearchJob.get_invocationc                 C   s&   d| j | j| j| j| jrdn| jf S )Nz5model:%s, fragments:%s, elim:%s, filter:%s, output:%sZPiped)rx   ry   r   r   r   r^   r   r   r   r   r3     s       zHMMSearchJob.characterize_inputc              	   C   s   | j r| jri }nNtj| js$tt| jtj dks>tt	| jd}| 
|}W 5 Q R X t	| jd}|t| W 5 Q R X | jS | jri }n<| jr| jd}| 
|}nt	| jd}| 
|}|  |S dS )z
           Reads the search output file and returns a dictionary that contains
           the e-values of the searched fragments
        r   rH   r#   r.   N)r   r   r;   r   r<   r^   r4   rt   ru   r8   read_results_from_tempwriter(   r   r   rq   close)r   resr^   targetr   r   r   r0     s&    
zHMMSearchJob.read_resultsc                 C   s   i }t d}d}|D ]}| }|s:|ddkr:d}q|rP|dkrPd} qq|r||}|d k	r|dddkrt|d	 t|d
 f||d < q|S )Nz`([^\s]+)\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)\s+([^\s]+)FzE-valueTrN   r   z--      	   )recompiler   
startswithsearchgroupfindfloat)r   r^   ZresultspatternZstart_readinglineZmatchesr   r   r   r     s&    
z#HMMSearchJob.read_results_from_temp)NT)NT)
rU   rV   rW   r	   re   r}   r9   r3   r0   r   r   r   r   r   r   q  s     

   
 r   c                   @   s<   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd ZdS )
PplacerJobc                 K   sF   d| _ tj| | j f| d | _d | _d | _d | _d | _d | _d | _	d S )NZpplacer)
r
   r   r	   setup_setting	tree_filebackbone_alignment_file	info_fileextended_alignment_filefull_extended_alignment_fileout_filera   r   r   r   r	     s    zPplacerJob.__init__c                 K   s4   || _ || _|| _|| _|| _|| _|| _d| _d S )NFile:TrInEx)r   r   r   r   r   r   r   r   )r   r   r   r   r   r   r|   r   r   r   r   re     s    zPplacerJob.setupc                 K   s   t |tjjsttjd|d| _tjd|d| _tjd| |d| _	tjd| |d| _
tjtj|| j	dd| _t |jtst|j| j t|d	r|jn|| _|| _d
| _dS )a   Automatically sets up a job given a subproblem object.
        Note that extended alignment and the backbone_alignment_file are just
        file names, referring to empty files at this point. These files needs
        to be created before the job is queued.
        zpplacer.backbone.rf   zpplacer.tree.z.trezpplacer.extended.%d.zpplacer.full.extended.%d.rb   ZjplacerJ   r   N)r7   r   rg   rh   r4   ri   rj   r   r   r   r   r;   r   r=   Ztempdir_for_subproblemreplacer   Zsubtreer   Zwrite_newick_to_pathr   rJ   r   r   r   )r   rk   r   ir   r   r   r   r}     sD           
z'PplacerJob.partial_setup_for_subproblemc                 C   sj   | j dtj | jg}d| jkr4|| jd   | jdkrf|ddd| jd| j	d| j
d	d
| jg |S )Nz	--out-dirr   r   -jrn   z-rz-sz-tz--groupsZ10)r   r;   dirnamer   r   rp   rq   r   r   r   r   r   rr   r   r   r   r9   (  s&     

    zPplacerJob.get_invocationc                 C   s.   | j dkr&d| j| j| j| j| jf S dS d S )Nr   zXbackbone_alignment_file:%s, tree_file:%s, info_file:%s, extended alignment:%s, output:%szNot setup properly)r   r   r   r   r   r   r   r   r   r   r3   7  s    
  zPplacerJob.characterize_inputc                 C   s<   | j r
dS tj| jstt| jtj dks6t| jS )a5  
        Since the output file can be huge, we don't want to read it here,
        because it will need to get pickled and unpickled. Instead, we just
        send back the file name, and will let the caller figure out what to do
        with it.

        But if it is a fake job, then return nothing
        Nr   )r   r;   r   r<   r   r4   rt   ru   r   r   r   r   r0   A  s
    	zPplacerJob.read_resultsNr   r   r   r   r   r     s   
r   c                   @   s4   e Zd Zdd Zdd Zdd Zdd Zd	d
 ZdS )MergeJsonJobc                 K   s(   d| _ tj| | j f| d | _d | _d S )NZ
jsonmerger)r
   r   r	   Zinput_stringr   ra   r   r   r   r	   R  s    zMergeJsonJob.__init__c                 K   s   || _ || _|| _d S r   )r   r   r   )r   ZinStringr|   r   r   r   r   re   X  s    zMergeJsonJob.setupc                 C   s   dd| j dd| jg}|S )Njavaz-jar-)r   r   rr   r   r   r   r9   ]  s      zMergeJsonJob.get_invocationc                 C   s
   d| j  S )Nzinput:pipe output:%s)r   r   r   r   r   r3   b  s    zMergeJsonJob.characterize_inputc                 C   s2   t j| jstt | jtj dks,t| jS )r   r   )r;   r   r<   r   r4   rt   ru   r   r   r   r   r0   e  s    zMergeJsonJob.read_resultsN)rU   rV   rW   r	   re   r9   r3   r0   r   r   r   r   r   Q  s
   r   c                   @   s@   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d Zdd Z	dS )MafftAlignJobz8
    The Job class that generates a Mafft alignment
    c                 K   s4   d| _ tj| | j f| d | _d | _d | _d| _d S )NZmafftr   )r
   r   r	   	sequencessizeoutputthreadsra   r   r   r   r	   v  s    zMafftAlignJob.__init__c                 K   s"   || _ || _|| _|| _|| _dS rd   N)r   r   r   r   r   )r   r   r   r   r   r   r   r   r   re   ~  s
    zMafftAlignJob.setupc                 C   s   dS )z=
        Use setup for generating backbone alignment
        Nr   r   r   r   r   rl     s    z"MafftAlignJob.setup_for_subproblemc                 C   sV   | j g}| jdkr(|dddddg n|dddg |d	d
t| j| jg |S )N   z
--parttreez--retree2z
--partsizeZ1000z--localpairz--maxiteratez--anysymbolz--thread)r   r   rp   r(   r   r   rr   r   r   r   r9     s    
zMafftAlignJob.get_invocationc                 C   s   d| j | jf S )Nzmafftalign %s %s)r   r   r   r   r   r   r3     s    z MafftAlignJob.characterize_inputc                 C   s*   | j }t| jd}|| |  | jS )z7
        Read from standard out, write to file
        r#   )r   r8   r   r   r   )r   r   r^   r   r   r   r0     s
    
zMafftAlignJob.read_resultsNrv   r   r   r   r   r   q  s   
r   c                   @   s@   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d Zdd Z	dS )FastTreeJobzH
    The Job class that generates a fasttree tree from an alignment
    c                 K   s.   d| _ tj| | j f| d | _d | _d | _d S )NZfasttree)r
   r   r	   alignment_filer|   r_   ra   r   r   r   r	     s
    zFastTreeJob.__init__c                 K   s   || _ || _|| _|| _dS r   )r   r|   r_   r   )r   r   r|   r_   r   r   r   r   re     s    zFastTreeJob.setupc                 C   s   dS z8
        Use setup for generating backbone tree
        Nr   r   r   r   r   rl     s    z FastTreeJob.setup_for_subproblemc                 C   s<   | j dg}| jdkr |dg |ddd| j| jg |S )Nz-gtrproteinz-ntz-quietz
-nosupportz-out)r   r_   rp   r|   r   rr   r   r   r   r9     s    

zFastTreeJob.get_invocationc                 C   s   d| j | j| jf S )Nzfasttree %s %s %s)r   r|   r_   r   r   r   r   r3     s
      zFastTreeJob.characterize_inputc                 C   s2   t j| jstt | jtj dks,t| jS )z'
        Check and return file
        r   )r;   r   r<   r|   r4   rt   ru   r   r   r   r   r0     s    zFastTreeJob.read_resultsNrv   r   r   r   r   r     s   
r   c                   @   sB   e Zd ZdZdd Zdd ZdddZd	d
 Zdd Zdd Z	dS )PastaAlignJobzA
    The Job class that generates a Pasta alignment and tree
    c                 K   s@   d| _ tj| | j f| d | _d | _d | _d | _d | _d | _d S )NZpasta	r
   r   r	   	alignmentr   r_   r   r   r   ra   r   r   r   r	     s    zPastaAlignJob.__init__c                 K   s4   || _ || _|| _|dkr$| jdk || _|| _dS z
        Use this to setup the job if you already have input file written to
        a file. Use setup_for_subproblem when possible.
        Zaar   N)r   r   r_   r   r   )r   r   r   r_   r   r   r   r   r   re     s    
zPastaAlignJob.setuprc   c                 K   s   dS r   r   r   rk   r_   r   r   r   r   rl     s    z"PastaAlignJob.setup_for_subproblemc                 C   s>   | j d| j d| jd| j dtj  dddtj  g	}|S )N--num-cpus=%d-i--datatype=%sz--temporaries=%s/pastaout/r   Zpastajobz--output-directory=%s/pastaout/)r   r   r   r_   r   ri   get_root_temp_dirrr   r   r   r   r9     s       zPastaAlignJob.get_invocationc                 C   s   d |  S Nr'   r=   r9   r   r   r   r   r3     s    z PastaAlignJob.characterize_inputc                 C   s   t jdtj  stt dtj  tj dks<tt	dtj  d}t
d}t
d}d}d}|D ]H}| }||}t|dkr|d }||}t|dkrp|d }qp||fS )zR
        Read the PASTA log file and get the alignment and tree from file
        z%s/pastaout/pastajob.out.txtr   rH   #Writing resulting alignment to (.*)Writing resulting tree to (.*)rN   )r;   r   r<   r   ri   r   r4   rt   ru   r8   r   r   r   findallrB   r   r^   Zalignment_patternZtree_patternr   r   r   r/   r   r   r   r0     s:    




zPastaAlignJob.read_resultsN)rc   rv   r   r   r   r   r     s   

	r   c                   @   sB   e Zd ZdZdd Zdd ZdddZd	d
 Zdd Zdd Z	dS )SateAlignJobz@
    The Job class that generates a Sate alignment and tree
    c                 K   s@   d| _ tj| | j f| d | _d | _d | _d | _d | _d | _d S )NZsater   ra   r   r   r   r	   #  s    zSateAlignJob.__init__c                 K   s:   || _ || _|| _|| _|dkr*| jdk || _|| _dS r   )r   r   r   r_   r   r   )r   r   r   r   r_   r   r   r   r   r   re   -  s    
zSateAlignJob.setuprc   c                 K   s   dS r   r   r   r   r   r   rl   ;  s    z!SateAlignJob.setup_for_subproblemc                 C   sp   d}| j dkrd}d}| jdkr$d}| jd| jd| d	d
d| j d| j d| j d|dddddtj  g}|S )Nz--max-subproblem-size=200r   z--max-subproblem-frac=0.50ZopalZaminoZmuscler   z--merger=%sz--aligner=mafftz--tree-estimator=fasttreer   r   z--temporaries=%s/satetmpz--break-strategy=centroidz--time-limit=-1z--iter-without-imp-limit=1r   Zsatejobz--output-directory=%s/sateout/)	r   r_   r   r   r   r   r   ri   r   )r   Zsize_strZ
mergerTypers   r   r   r   r9   A  s.    

    
zSateAlignJob.get_invocationc                 C   s   d |  S r   r   r   r   r   r   r3   T  s    zSateAlignJob.characterize_inputc                 C   s   t jdtj  stt dtj  tj dks<tt	dtj  d}t
d}t
d}d}d}|D ]H}| }||}t|dkr|d }||}t|dkrp|d }qpt|d| j  t|d| j  ||fS )	zs
        Read the Sate log file and get the alignment and tree from file,
        copy to output directory
        z%s/sateout/satejob.out.txtr   rH   r   r   rN   z%s/sate.fasttreez%s/sate.fasta)r;   r   r<   r   ri   r   r4   rt   ru   r8   r   r   r   r   rB   shutilZcopyfiler   r   r   r   r   r0   W  sB    




zSateAlignJob.read_resultsN)rc   rv   r   r   r   r   r     s   

r   )#rX   Zsepp.schedulerr   r   r   r   abcr   r5   r   r;   r   rt   r   Z	sepp.treer   Zsepp.configrE   ioIOBasefilerI   	NameErrorrU   r1   r   r[   rw   r   r   r   r   r   r   r   r   r   r   r   <module>   s:    :PK{e 60H