U
    H$xe%:                     @   sn  d Z ddlmZmZ ddlmZmZmZmZ ddl	m
Z
 ddlmZ ddlmZ ddlmZ ee
e
f ZG d	d
 d
eZG dd deZG dd deZG dd deZG dd deeZG dd deeZG dd deZG dd deZG dd deZG dd deZG dd deeZG dd  d eeZG d!d" d"eeeZG d#d$ d$eeeZG d%d& d&eeZ d'S )(a!  
Steps of the read output pipeline

After all read modifications have been done, a read is written to at
most one output file. For this, a pipeline represented as a list of "steps"
(SingleEndSteps or PairedEndSteps) is used. Each pipeline step can consume
(discard) a read or pass it on to the next step.

Steps are added to the pipeline in a certain order:

1. First RestFileWriter, InfoFileWriter, WildcardFileWriter because
   they should see all reads before filtering.
2. Filters come next. These are implemented as SingleEndFilter or PairedEndFilter
   instances with an appropriate Predicate. Filters can optionally send each
   consumed/filtered read to an output file.
3. The last pipeline step should be one of the "Sinks", which consume all reads.
   Demultiplexers are sinks, for example.
    )ABCabstractmethod)TupleDictOptionalAny)SequenceRecord   )	Predicate)ModificationInfo)ReadLengthStatisticsc                   @   s$   e Zd Zeeee dddZdS )SingleEndStepinforeturnc                 C   s   dS )z
        Process a single read. Return the processed read or None to indicate that
        the read has been consumed and should thus not be passed on to subsequent
        steps.
        N selfreadr   r   r   -lib/python3.8/site-packages/cutadapt/steps.py__call__!   s    zSingleEndStep.__call__N)__name__
__module____qualname__r   r   r   r   r   r   r   r   r   r       s   r   c                   @   s&   e Zd Zeeeee dddZdS )PairedEndStepinfo1info2r   c                 C   s   dS )z
        Process (read1, read2). Return the processed read pair or None if
        the read pair has been "consumed" (filtered or written to an output file)
        and should thus not be passed on to subsequent steps.
        Nr   r   read1read2r   r   r   r   r   r   +   s    zPairedEndStep.__call__N)r   r   r   r   r   r   
RecordPairr   r   r   r   r   r   *   s
    r   c                   @   s"   e Zd ZdZeedddZdS )HasStatisticsz^
    Used for the final steps (sinks), which need to keep track of read length statistics
    r   c                 C   s   d S Nr   r   r   r   r   get_statistics;   s    zHasStatistics.get_statisticsN)r   r   r   __doc__r   r   r&   r   r   r   r   r"   6   s   r"   c                   @   s0   e Zd ZeedddZeedddZdS )HasFilterStatisticsr#   c                 C   s   dS )z-Return number of filtered reads or read pairsNr   r%   r   r   r   filteredA   s    zHasFilterStatistics.filteredc                 C   s   dS )zName used in statisticsNr   r%   r   r   r   descriptive_identifierE   s    z*HasFilterStatistics.descriptive_identifierN)r   r   r   r   intr)   strr*   r   r   r   r   r(   @   s   r(   c                       s^   e Zd ZdZed fddZdd Zeddd	Ze	dd
dZ
eee dddZ  ZS )SingleEndFilterz
    A pipeline step that can filter reads, can redirect filtered ones to a writer, and
    counts how many were filtered.
    )	predicatec                    s    t    d| _|| _|| _d S Nr   )super__init__	_filtered
_predicate_writer)r   r.   writer	__class__r   r   r1   P   s    
zSingleEndFilter.__init__c                 C   s   d| j  d| j dS )NzSingleEndFilter(predicate=	, writer=))r3   r4   r%   r   r   r   __repr__V   s    zSingleEndFilter.__repr__r#   c                 C   s
   | j  S r$   )r3   r*   r%   r   r   r   r*   Y   s    z&SingleEndFilter.descriptive_identifierc                 C   s   | j S r$   r2   r%   r   r   r   r)   \   s    zSingleEndFilter.filteredr   c                 C   s:   | j ||r6|  jd7  _| jd k	r2| j| d S |S Nr	   )r3   testr2   r4   writer   r   r   r   r   _   s    
zSingleEndFilter.__call__)r   r   r   r'   r
   r1   r:   r,   r*   r+   r)   r   r   r   r   __classcell__r   r   r6   r   r-   J   s   r-   c                       s   e Zd ZdZdee ee d fddZdd Zedd	d
Z	e
dddZeeedddZeeedddZeeedddZeeedddZeeee dddZ  ZS )PairedEndFiltera  
    A pipeline step that can filter paired-end reads, redirect them to a file, and counts
    how many read pairs were filtered.

    Different filtering styles are supported, differing by which of the
    two reads in a pair have to fulfill the filtering criterion.
    any)
predicate1
predicate2c                    s   t    |dkrtd|| _d| _|| _|| _|| _|  |dkrN| j| _	n>|dkr`| j
| _	n,|dkrr| j| _	n|dkr| j| _	n| j| _	dS )a  
        pair_filter_mode -- these values are allowed:
            'any': The pair is discarded if any read matches.
            'both': The pair is discarded if both reads match.
            'first': The pair is discarded if the first read matches.
        )rA   bothfirstz1pair_filter_mode must be 'any', 'both' or 'first'r   NrA   rD   )r0   r1   
ValueError_pair_filter_moder2   rB   rC   r5   _is_filtered_first_is_filtered_is_filtered_second_is_filtered_any_is_filtered_both)r   rB   rC   r5   Zpair_filter_moder6   r   r   r1   q   s$    




zPairedEndFilter.__init__c              	   C   s&   d| j  d| j d| j d| j d	S )NzPairedEndFilter(predicate1=z, predicate2=r8   z, pair_filter_mode='z'))rB   rC   r5   rG   r%   r   r   r   r:      s    $zPairedEndFilter.__repr__r#   c                 C   s0   | j d k	r| j  S | jd k	s"t| j S d S r$   )rB   r*   rC   AssertionErrorr%   r   r   r   r*      s    

z&PairedEndFilter.descriptive_identifierc                 C   s   | j S r$   r;   r%   r   r   r   r)      s    zPairedEndFilter.filteredr   c                 C   s   | j ||p| j||S r$   rB   r=   rC   r   r   r   r   rK      s    z PairedEndFilter._is_filtered_anyc                 C   s   | j ||o| j||S r$   rN   r   r   r   r   rL      s    z!PairedEndFilter._is_filtered_bothc                 C   s   | j ||S r$   )rB   r=   r   r   r   r   rH      s    z"PairedEndFilter._is_filtered_firstc                 C   s   | j ||S r$   )rC   r=   r   r   r   r   rJ      s    z#PairedEndFilter._is_filtered_secondc                 C   sB   |  ||||r:|  jd7  _| jd k	r6| j|| d S ||fS r<   )rI   r2   r5   r>   r   r   r   r   r      s    
zPairedEndFilter.__call__)rA   )r   r   r   r'   r   r
   r1   r:   r,   r*   r+   r)   r   boolrK   rL   rH   rJ   r!   r   r?   r   r   r6   r   r@   h   s:    !     r@   c                   @   s.   e Zd Zdd Zdd Zee dddZdS )	RestFileWriterc                 C   s
   || _ d S r$   _filer   filer   r   r   r1      s    zRestFileWriter.__init__c                 C   s   d| j  dS )NzRestFileWriter(file=r9   rQ   r%   r   r   r   r:      s    zRestFileWriter.__repr__r#   c                 C   s6   |j r2|j d  }t|dkr2t||j| jd |S )Nr   rT   )matchesrestlenprintnamerR   )r   r   r   rX   r   r   r   r      s
    zRestFileWriter.__call__Nr   r   r   r1   r:   r   r   r   r   r   r   r   rP      s   rP   c                   @   s.   e Zd Zdd Zdd Zee dddZdS )	WildcardFileWriterc                 C   s
   || _ d S r$   rQ   rS   r   r   r   r1      s    zWildcardFileWriter.__init__c                 C   s   d| j  dS )NzWildcardFileWriter(file=r9   rQ   r%   r   r   r   r:      s    zWildcardFileWriter.__repr__r#   c                 C   s&   |j r"t|j d  |j| jd |S )NrU   rV   )rW   rZ   Z	wildcardsr[   rR   r   r   r   r   r      s    zWildcardFileWriter.__call__Nr\   r   r   r   r   r]      s   r]   c                   @   s<   e Zd ZddddZdd Zdd Zeee d	d
dZ	dS )InfoFileWriter 10)NTFc                 C   s
   || _ d S r$   rQ   rS   r   r   r   r1      s    zInfoFileWriter.__init__c                 C   s   d| j  dS )NzInfoFileWriter(file=r9   rQ   r%   r   r   r   r:      s    zInfoFileWriter.__repr__r   c                 C   s   |j }|jr| }|jrx|jD ]T}||D ]:}t|j|d  f|dd  | j|j fd| jd q.|	|}q n2|j
}|jd k	r|jnd}t|jd||d| jd |S )Nr   r	   	)seprT   r_   rU   )Zoriginal_readZis_rcZreverse_complementrW   Zget_info_recordsrZ   r[   RC_MAPrR   ZtrimmedZsequence	qualities)r   r   r   Zcurrent_readmatchZinfo_recordseqre   r   r   r   r      s(    



zInfoFileWriter.__call__N)
r   r   r   rd   r1   r:   r   r   r   r   r   r   r   r   r^      s   r^   c                   @   s8   e Zd ZdZedddZdd Zee ddd	Z	d
S )PairedSingleEndStepze
    Wrap a SingleEndStep as a PairedEndStep

    The wrapped step is called with the first read
    )stepc                 C   s
   || _ d S r$   Z_step)r   ri   r   r   r   r1     s    zPairedSingleEndStep.__init__c                 C   s   d| j  dS )NzPairedSingleEndStep(step=r9   rj   r%   r   r   r   r:   
  s    zPairedSingleEndStep.__repr__r#   c                 C   s(   |}|}|  ||}|d kr d S ||fS r$   rj   )r   r   r    r   r   _resultr   r   r   r     s    zPairedSingleEndStep.__call__N)
r   r   r   r'   r   r1   r:   r   r!   r   r   r   r   r   rh      s   rh   c                       sJ   e Zd ZdZ fddZdd Zeee dddZ	e
d	d
dZ  ZS )SingleEndSinkzv
    Send each read to a writer and keep read length statistics.
    This is used as the last step in a pipeline.
    c                    s   t    || _t | _d S r$   r0   r1   r5   r   _statisticsr   r5   r6   r   r   r1   !  s    
zSingleEndSink.__init__c                 C   s   d| j  dS )Nz	NoFilter(r9   r5   r%   r   r   r   r:   &  s    zSingleEndSink.__repr__r   c                 C   s   | j | | j| d S r$   )r5   r>   ro   updater   r   r   r   r   )  s    zSingleEndSink.__call__r#   c                 C   s   | j S r$   ro   r%   r   r   r   r&   .  s    zSingleEndSink.get_statistics)r   r   r   r'   r1   r:   r   r   r   r   r   r&   r?   r   r   r6   r   rm     s
   rm   c                       sL   e Zd ZdZ fddZdd Zeeee dddZ	e
d	d
dZ  ZS )PairedEndSinkz{
    Send each read pair to a writer and keep read length statistics.
    This is used as the last step in a pipeline.
    c                    s   t    || _t | _d S r$   rn   rp   r6   r   r   r1   8  s    
zPairedEndSink.__init__c                 C   s   d| j  dS )NzPairedNoFilter(r9   rq   r%   r   r   r   r:   =  s    zPairedEndSink.__repr__r   c                 C   s    | j || | j|| d S r$   )r5   r>   ro   update2r   r   r   r   r   @  s    zPairedEndSink.__call__r#   c                 C   s   | j S r$   rs   r%   r   r   r   r&   G  s    zPairedEndSink.get_statistics)r   r   r   r'   r1   r:   r   r   r!   r   r   r&   r?   r   r   r6   r   rt   2  s    rt   c                       sv   e Zd ZdZeee ef d fddZdd Z	ee
 ddd	Zedd
dZedddZedddZ  ZS )Demultiplexerz
    Demultiplex trimmed reads. Reads are written to different output files
    depending on which adapter matches.

    Untrimmed reads are sent to writers[None] if that key exists.
    writersc                    s2   t    || _| jdd| _t | _d| _dS )z:
        writers maps an adapter name to a writer
        Nr   r0   r1   _writersget_untrimmed_writerr   ro   r2   r   rx   r6   r   r   r1   S  s
    
zDemultiplexer.__init__c                 C   s   dt | j dS )Nz<Demultiplexer len(writers)=>)rY   rz   r%   r   r   r   r:   ]  s    zDemultiplexer.__repr__r#   c                 C   sh   |j r2|j d jj}| j| | j| | n2| jdk	rV| j| | j| n|  jd7  _dS )z]
        Write the read to the proper output file according to the most recent match
        rU   Nr	   )	rW   adapterr[   ro   rr   rz   r>   r|   r2   )r   r   r   r[   r   r   r   r   `  s    
zDemultiplexer.__call__c                 C   s   dS NZdiscard_untrimmedr   r%   r   r   r   r*   o  s    z$Demultiplexer.descriptive_identifierc                 C   s   | j S r$   rs   r%   r   r   r   r&   r  s    zDemultiplexer.get_statisticsc                 C   s   | j S r$   r;   r%   r   r   r   r)   u  s    zDemultiplexer.filtered)r   r   r   r'   r   r   r,   r   r1   r:   r   r   r*   r   r&   r+   r)   r?   r   r   r6   r   rv   K  s   
rv   c                       sr   e Zd ZdZeee ef d fddZe	e	ee
 dddZedd	d
ZedddZedddZ  ZS )PairedDemultiplexerz
    Demultiplex trimmed paired-end reads. Reads are written to different output files
    depending on which adapter (in read 1) matches.
    rw   c                    s2   t    || _| jd d | _t | _d| _d S r/   ry   r}   r6   r   r   r1     s
    
zPairedDemultiplexer.__init__r   c                 C   s|   |d k	st |jrB|jd jj}| j|| | j| || n6| jd k	rj| j|| | j|| n|  j	d7  _	d S )NrU   r	   )
rM   rW   r   r[   ro   ru   rz   r>   r|   r2   )r   r   r    r   r   r[   r   r   r   r     s    
zPairedDemultiplexer.__call__r#   c                 C   s   dS r   r   r%   r   r   r   r*     s    z*PairedDemultiplexer.descriptive_identifierc                 C   s   | j S r$   rs   r%   r   r   r   r&     s    z"PairedDemultiplexer.get_statisticsc                 C   s   | j S r$   r;   r%   r   r   r   r)     s    zPairedDemultiplexer.filtered)r   r   r   r'   r   r   r,   r   r1   r   r!   r   r*   r   r&   r+   r)   r?   r   r   r6   r   r   y  s    r   c                       s^   e Zd ZdZeeee ee f ef d fddZ	ee
 dddZeddd	Z  ZS )
CombinatorialDemultiplexerzx
    Demultiplex paired-end reads depending on which adapter matches, taking into account
    matches on R1 and R2.
    rw   c                    s   t    || _t | _dS )a&  
        Adapter names of the matches on R1 and R2 will be used to look up the writer in the
        writers dict. If there is no match on a read, None is used in the lookup instead
        of the name. Missing dictionary keys are ignored and can be used to discard
        read pairs.
        N)r0   r1   rz   r   ro   r}   r6   r   r   r1     s    
z#CombinatorialDemultiplexer.__init__r#   c                 C   sr   |dk	st |jr |jd jjnd}|jr8|jd jjnd}||f}|| jkrn| j|| | j| || dS )zy
        Write the read to the proper output file according to the most recent matches both on
        R1 and R2
        NrU   )rM   rW   r   r[   rz   ro   ru   r>   )r   r   r    r   r   Zname1Zname2keyr   r   r   r     s    
z#CombinatorialDemultiplexer.__call__c                 C   s   | j S r$   rs   r%   r   r   r   r&     s    z)CombinatorialDemultiplexer.get_statistics)r   r   r   r'   r   r   r   r,   r   r1   r!   r   r   r&   r?   r   r   r6   r   r     s   *r   N)!r'   abcr   r   typingr   r   r   r   Zdnaior   Z
predicatesr
   Z	modifiersr   Z
statisticsr   r!   r   r   r"   r(   r-   r@   rP   r]   r^   rh   rm   rt   rv   r   r   r   r   r   r   <module>   s,   


Y!.&