U
    H$xeI                     @   s$  d Z ddlZddlZddlmZ ddlmZ ddlmZm	Z	m
Z
mZmZ ddlmZmZ ddlmZmZ dd	lmZmZmZ dd
lmZmZmZmZmZmZmZ ddlm Z m!Z!m"Z"m#Z# ddl$m%Z% e& Z'dZ(G dd deZ)G dd deZ*G dd de*Z+G dd de)Z,G dd de)Z-G dd de*Z.G dd de/Z0G dd de*Z1G dd de)Z2G d d! d!e)Z3G d"d# d#e)Z4G d$d% d%e)Z5G d&d' d'e/Z6G d(d) d)e)Z7G d*d+ d+e*Z8G d,d- d-e)Z9G d.d/ d/e)Z:G d0d1 d1e)Z;G d2d3 d3e)Z<G d4d5 d5e)Z=G d6d7 d7e)Z>dS )8z
This module implements all the read modifications that cutadapt supports.
A modifier must be callable and typically implemented as a class with a
__call__ method.
    N)defaultdict)SimpleNamespace)SequenceListTupleOptionalSet)ABCabstractmethod)record_names_matchSequenceRecord   )quality_trim_indexnextseq_trim_indexpoly_a_trim_index)MultipleAdaptersSingleAdapterIndexedPrefixAdaptersIndexedSuffixAdaptersMatch	remainderAdapter)tokenize_bracesTokenizeErrorToken
BraceToken)ModificationInfo   c                   @   s    e Zd ZeeedddZdS )SingleEndModifierreadinfoc                 C   s   d S N selfr    r!   r#   r#   1lib/python3.8/site-packages/cutadapt/modifiers.py__call__$   s    zSingleEndModifier.__call__N)__name__
__module____qualname__r
   r   r   r'   r#   r#   r#   r&   r   #   s   r   c                   @   s.   e Zd Zeeeeeeeef dddZdS )PairedEndModifierread1read2info1info2returnc                 C   s   d S r"   r#   r%   r-   r.   r/   r0   r#   r#   r&   r'   *   s    zPairedEndModifier.__call__N)r(   r)   r*   r
   r   r   r   r'   r#   r#   r#   r&   r+   )   s   
r+   c                   @   sD   e Zd ZdZdZee ee dddZdd Ze	e	dd	d
Z
dS )PairedEndModifierWrapperzR
    Wrap two SingleEndModifiers that work on both reads in a paired-end read
    T)	modifier1	modifier2c                 C   s,   || _ || _| j dkr(| jdkr(tddS )z9Set one of the modifiers to None to work on R1 or R2 onlyNzNot both modifiers may be None)
_modifier1
_modifier2
ValueError)r%   r4   r5   r#   r#   r&   __init__<   s    z!PairedEndModifierWrapper.__init__c                 C   s   d| j d| jdS )Nz#PairedEndModifierWrapper(modifier1=z, modifier2=)r6   r7   r%   r#   r#   r&   __repr__G   s    z!PairedEndModifierWrapper.__repr__)r/   r0   c                 C   sL   | j d kr|| ||fS | jd kr4|  |||fS |  ||| ||fS r"   r;   r2   r#   r#   r&   r'   M   s
    

z!PairedEndModifierWrapper.__call__N)r(   r)   r*   __doc__Zpairedr   r   r9   r=   r   r'   r#   r#   r#   r&   r3   5   s   r3   c                   @   s   e Zd ZdZdee eee e	dddZ
dd	 Zd
d Zeee eee ee ee f dddZeee dddZeee dddZeee dddZedddZdd Zdd ZdS )AdapterCuttera	  
    Repeatedly find one of multiple adapters in reads.

    Arguments:
        adapters: Adapters to be searched
        times: Repeat the search this number of times.
        action: What to do with a found adapter.
            - *None*: Do nothing, only update the ModificationInfo appropriately
            - "trim": Remove the adapter and down- or upstream sequence depending on adapter type
            - "mask": Replace the part of the sequence that would have been removed with "N" bases
            - "lowercase": Convert the part of the sequence that would have been removed to lowercase
            - "retain": Like "trim", but leave the adapter sequence itself in the read
        index: If True, attempt to create an index to speed up the search (if possible)
    r   trimT)adapterstimesactionindexc                 C   s   || _ |dkst|| _d| _dd |D | _|rDt| || _n
t|| _|dkrf|dkrftd| j dkr| jdkr| j	| _
d S )	N)r@   mask	lowercaseretainNr   c                 S   s   i | ]}||  qS r#   Zcreate_statistics.0ar#   r#   r&   
<dictcomp>p   s      z*AdapterCutter.__init__.<locals>.<dictcomp>rG   r   z*'retain' cannot be combined with times > 1r@   )rB   AssertionErrorrC   with_adaptersadapter_statisticsr   _regroup_into_indexed_adaptersrA   r8    _match_and_trim_once_action_trimmatch_and_trim)r%   rA   rB   rC   rD   r#   r#   r&   r9   e   s    
zAdapterCutter.__init__c                 C   s   d| j d| j d| j dS )NzAdapterCutter(adapters=z, times=z
, action=''))rA   rB   rC   r<   r#   r#   r&   r=   |   s    zAdapterCutter.__repr__c                 C   s   |  |\}}}t|tks(t|tkr||}t|dkrH|t| n
|| t|dkrn|t| n
|| |S |S d S Nr   )_split_adapterslenINDEXING_THRESHOLDappendr   extendr   )r%   rA   prefixsuffixZsingleresultr#   r#   r&   rP      s    

z,AdapterCutter._regroup_into_indexed_adapters)rA   r1   c                 C   sV   g }g }g }| D ]:}t |r*|| qt|r@|| q|| q|||fS )a  
        Split adapters into three different categories so that they can possibly be used
        with a MultiAdapter. Return a tuple (prefix, suffix, other), where
        - prefix is a list of all anchored 5' adapters that MultiAdapter would accept
        - suffix is a list of all anchored 3' adapters that MultiAdapter would accept
        - other is a list of all remaining adapters.
        )r   Zis_acceptablerX   r   )rA   rZ   r[   otherrK   r#   r#   r&   rU      s    

zAdapterCutter._split_adapters)matchesc                 C   s   |d   \}}| || S )N)Zretained_adapter_interval)r    r^   startstopr#   r#   r&   trim_but_retain_adapter   s    z%AdapterCutter.trim_but_retain_adapterc                 C   sD   t |\}}| d d  }d| | j||  dt| |   |_|S )NN)r   sequencerV   r    r^   r`   ra   r\   r#   r#   r&   masked_read   s
    $zAdapterCutter.masked_readc                 C   sT   t |\}}| d d  }| jd |  | j||   | j|d    |_|S r"   )r   rd   lowerupperre   r#   r#   r&   lowercased_read   s    zAdapterCutter.lowercased_readr!   c                 C   sL   |  |\}}|r<|  jd7  _|D ]}| j|j | q$|j| |S rT   )rR   rN   rO   adapter	add_matchr^   rY   )r%   r    r!   trimmed_readr^   matchr#   r#   r&   r'      s    zAdapterCutter.__call__c                 C   s   g }| j dkr|j |_|}t| jD ]2}| j|j}|dkrF q\|| ||}q(|sh|g fS | j dkrtnt| j dkr| 	||}n\| j dkr| 
||}nD| j dkr| ||}t|jt|kstn| j dkr|dd }||fS )a  
        Search for the best-matching adapter in a read, perform the requested action
        ('trim', 'mask' etc. as determined by self.action) and return the
        (possibly) modified read.

        *self.times* adapter removal rounds are done. During each round,
        only the best-matching adapter is trimmed. If no adapter was found in a round,
        no further rounds are attempted.

        Return a pair (trimmed_read, matches), where matches is a list of Match instances.
        rF   Nr@   rG   rE   )rC   rd   rh   rangerB   rA   match_torX   trimmedrb   rf   ri   rV   rM   )r%   r    r^   rm   _rn   r#   r#   r&   rR      s0    






zAdapterCutter.match_and_trimc                 C   s2   | j |j}|dk	r&|||gfS |g fS dS )zm
        Specalization of match_and_trim for the case that self.times == 1 and self.action == 'trim'
        N)rA   rp   rd   rq   )r%   r    rn   r#   r#   r&   rQ      s    z.AdapterCutter._match_and_trim_once_action_trimN)r   r@   T)r(   r)   r*   r>   r   r   intr   strboolr9   r=   rP   staticmethodr   r   rU   r   rb   rf   ri   r   r'   rR   rQ   r#   r#   r#   r&   r?   U   s6      
	*r?   c                   @   s>   e Zd ZdZdeee dddZdd Ze	e
dd	d
ZdS )ReverseComplementerz4Trim adapters from a read and its reverse complement rc)adapter_cutter	rc_suffixc                 C   s   || _ d| _|| _dS zb
        rc_suffix -- suffix to add to the read name if sequence was reverse-complemented
        r   N)ry   reverse_complemented_suffix)r%   ry   rz   r#   r#   r&   r9     s    zReverseComplementer.__init__c                 C   s   d| j  dS )Nz#ReverseComplementer(adapter_cutter=r:   )ry   r<   r#   r#   r&   r=     s    zReverseComplementer.__repr__r   c                 C   s   |  }| j|\}}| j|\}}tdd |D }tdd |D }	|	|k}
|
r|  jd7  _|snt|| }}d|_| jr| j| j7  _nd|_|| }}|r| j j	d7  _	|D ].}| jj
|j }|| | jt|
7  _q|j| |S )Nc                 s   s   | ]}|j V  qd S r"   scorerJ   mr#   r#   r&   	<genexpr>  s     z/ReverseComplementer.__call__.<locals>.<genexpr>c                 s   s   | ]}|j V  qd S r"   r~   r   r#   r#   r&   r     s     r   TF)Zreverse_complementry   rR   sumr|   rM   is_rcr}   namerN   rO   rk   rl   ru   r^   rY   )r%   r    r!   Zreverse_readZforward_trimmed_readZforward_matchesZreverse_trimmed_readZreverse_matchesZforward_scoreZreverse_scoreuse_reverse_complementrm   r^   rn   statsr#   r#   r&   r'     s2    


zReverseComplementer.__call__N)rx   )r(   r)   r*   r>   r?   r   rt   r9   r=   r   r   r'   r#   r#   r#   r&   rw     s   rw   c                   @   sL   e Zd ZdZdee ee ee dddZdd Ze	e	e
e
dd	d
ZdS )PairedReverseComplementerzHTrim adapters from a read pair or its reverse complement (R1/R2 swapped)rx   )adapter_cutter1adapter_cutter2rz   c                 C   s   || _ || _d| _|| _dS r{   )r   r   r|   r}   )r%   r   r   rz   r#   r#   r&   r9   6  s    	z"PairedReverseComplementer.__init__c                 C   s   d| j  d| j dS )Nz*PairedReverseComplementer(adapter_cutter1=z, adapter_cutter2=r:   )r   r   r<   r#   r#   r&   r=   D  s    z"PairedReverseComplementer.__repr__)r1r2r/   r0   c                 C   s  | j d k	r| j |\}}n
|g  }}| jd k	rB| j|\}}n
|g  }}tdd |D tdd |D  }	| j d k	r| j |\}
}n
|g  }
}| jd k	r| j|\}}n
|g  }}tdd |D tdd |D  }||	k}|rB|  jd7  _|
}|}|}|}d |_|_| jrN| j| j7  _| j| j7  _nd |_|_|r| j  jd7  _|D ]0}| j j	|j
 }|| | jt|7  _qh|j| |r| j jd7  _|D ]0}| jj	|j
 }|| | jt|7  _q|j| ||fS )Nc                 s   s   | ]}|j V  qd S r"   r~   r   r#   r#   r&   r   Z  s     z5PairedReverseComplementer.__call__.<locals>.<genexpr>c                 s   s   | ]}|j V  qd S r"   r~   r   r#   r#   r&   r   m  s     r   TF)r   rR   r   r   r|   r   r}   r   rN   rO   rk   rl   ru   r^   rY   )r%   r   r   r/   r0   Z
r1_trimmedZ
r1_matchesZ
r2_trimmedZ
r2_matchesZunswapped_scoreZr1_trimmed_swappedZr1_matches_swappedZr2_trimmed_swappedZr2_matches_swappedZswapped_scorer   rn   r   r#   r#   r&   r'   J  sj    













z"PairedReverseComplementer.__call__N)rx   )r(   r)   r*   r>   r   r?   rt   r9   r=   r   r   r'   r#   r#   r#   r&   r   3  s    r   c                   @   s   e Zd ZdS )PairedAdapterCutterErrorNr(   r)   r*   r#   r#   r#   r&   r     s   r   c                       sP   e Zd ZdZd fdd	Zdd Zdd Zeeee	e
e
f  d	d
dZ  ZS )PairedAdapterCutterz0
    Trim adapters in pairs from R1 and R2.
    r@   c              	      s   t    t|t|kr2tdt|t||s>tdtt||| _t	d | jD ]&\}}t	d|j
| |j
|  q^|| _d| _ddg| _dd |D | jd< d	d |D | jd
< dS )ag  
        adapters1 -- list of Adapters to be removed from R1
        adapters2 -- list of Adapters to be removed from R2

        Both lists must have the same, non-zero length.
         read pair is trimmed if adapters1[i] is found in R1 and adapters2[i] in R2.

        action -- What to do with a found adapter: None, 'trim', 'lowercase' or 'mask'
        z[The number of adapters to trim from R1 and R2 must be the same. Given: {} for R1, {} for R2zNo adapters givenzAdapter pairs:u    • %s=%s -- %s=%sr   Nc                 S   s   i | ]}||  qS r#   rH   rI   r#   r#   r&   rL     s      z0PairedAdapterCutter.__init__.<locals>.<dictcomp>c                 S   s   i | ]}||  qS r#   rH   rI   r#   r#   r&   rL     s      r   )superr9   rV   r   formatlistzip_adapter_pairsloggerdebugr   specrC   rN   rO   )r%   Z	adapters1Z	adapters2rC   Za1Za2	__class__r#   r&   r9     s&    

 
 
zPairedAdapterCutter.__init__c                 C   s   d| j dS )Nz"PairedAdapterCutter(adapter_pairs=r:   )r   r<   r#   r#   r&   r=     s    zPairedAdapterCutter.__repr__c                 C   sH  |  |j|j}|dkr ||fS |\}}|  jd7  _g }tddg||g||gD ]\}	}
}|}| jdkrv|j |_|
|}| j|	 |
j 	|
 | jdkrn~| jdkrt
||
g}nd| jdkrt
||
g}t|jt|kstn4| jdkrt
||
g}n| jdkr |dd }|| qR|j| |j| |S ) Nr   r   rF   r@   rE   rG   )_find_best_match_pairrd   rN   r   rC   rh   rq   rO   rk   rl   r?   rf   ri   rV   rM   rb   rX   r^   )r%   r-   r.   r/   r0   Zbest_matchesmatch1match2r\   irn   r    rm   r#   r#   r&   r'     s6    "




zPairedAdapterCutter.__call__)	sequence1	sequence2r1   c                 C   s   d }d }d }| j D ]x\}}||}|d kr.q||}	|	d krBq|j|	j }
|j|	j }|d ksz|
|ksz|
|kr||k r||	f}|
}|}q|S r"   )r   rp   r   errors)r%   r   r   ZbestZ
best_scoreZbest_errorsZadapter1Zadapter2r   r   Ztotal_scoreZtotal_errorsr#   r#   r&   r     s0    

z)PairedAdapterCutter._find_best_match_pair)r@   )r(   r)   r*   r>   r9   r=   r'   rt   r   r   r   r   __classcell__r#   r#   r   r&   r     s   " r   c                   @   s4   e Zd ZdZedddZdd Zeddd	Zd
S )UnconditionalCuttera  
    A modifier that unconditionally removes the first n or the last n bases from a read.

    If the length is positive, the bases are removed from the beginning of the read.
    If the length is negative, the bases are removed from the end of the read.
    lengthc                 C   s
   || _ d S r"   r   r%   r   r#   r#   r&   r9     s    zUnconditionalCutter.__init__c                 C   s   d| j  dS )NzUnconditionalCutter(length=r:   r   r<   r#   r#   r&   r=     s    zUnconditionalCutter.__repr__rj   c                 C   sX   | j dkr*|jd | j  |_|| j d  S | j dk rT|j| j d  |_|d | j  S d S Nr   )r   rd   
cut_prefix
cut_suffixr$   r#   r#   r&   r'     s    

zUnconditionalCutter.__call__N)	r(   r)   r*   r>   rs   r9   r=   r   r'   r#   r#   r#   r&   r     s   r   c                   @   s.   e Zd ZdZdd Zdd ZedddZd	S )
LengthTagModifierz5
    Replace "length=..." strings in read names.
    c                 C   s   t d| d | _|| _d S )Nz\bz[0-9]*\b)recompileregex
length_tag)r%   r   r#   r#   r&   r9     s    zLengthTagModifier.__init__c                 C   s   d| j  dS )NzLengthTagModifier(length_tag='rS   )r   r<   r#   r#   r&   r=     s    zLengthTagModifier.__repr__rj   c                 C   sD   |d d  }|j | jdkr@| j| jtt|j |j |_ |S r   )r   findr   r   subrt   rV   rd   r$   r#   r#   r&   r'     s     zLengthTagModifier.__call__Nr(   r)   r*   r>   r9   r=   r   r'   r#   r#   r#   r&   r     s   r   c                   @   s.   e Zd ZdZdd Zdd ZedddZd	S )
SuffixRemoverz0
    Remove a given suffix from read names.
    c                 C   s
   || _ d S r"   r[   )r%   r[   r#   r#   r&   r9   (  s    zSuffixRemover.__init__c                 C   s   d| j  dS )NzSuffixRemover('rS   r   r<   r#   r#   r&   r=   +  s    zSuffixRemover.__repr__rj   c                 C   s6   |d d  }|j | jr2|j d t| j  |_ |S r"   )r   endswithr[   rV   r$   r#   r#   r&   r'   .  s    zSuffixRemover.__call__Nr   r#   r#   r#   r&   r   #  s   r   c                   @   s(   e Zd ZdZdd Zdd Zdd ZdS )	PrefixSuffixAdderz1
    Add a suffix and a prefix to read names
    c                 C   s   || _ || _d S r"   rZ   r[   )r%   rZ   r[   r#   r#   r&   r9   :  s    zPrefixSuffixAdder.__init__c                 C   s   d| j  d| j dS )NzPrefixSuffixAdder(prefix='z', suffix='rS   r   r<   r#   r#   r&   r=   >  s    zPrefixSuffixAdder.__repr__c                 C   sL   |d d  }|j r |j d jjnd}| jd||j | jd| |_|S )Nr_   
no_adapterz{name})r^   rk   r   rZ   replacer[   )r%   r    r!   adapter_namer#   r#   r&   r'   A  s    zPrefixSuffixAdder.__call__N)r(   r)   r*   r>   r9   r=   r'   r#   r#   r#   r&   r   5  s   r   c                   @   s   e Zd ZdS )InvalidTemplateNr   r#   r#   r#   r&   r   L  s   r   c                   @   s   e Zd ZdZdddddddd	hZed
ddZdd Zdd Zdd Z	e
ee ee ddddZe
eeeef dddZeeedddZdS )Renamera0  
    Rename reads using a template

    The template string can contain the following placeholders:

    - {header} -- full, unchanged header
    - {id} -- the part of the header before the first whitespace
    - {comment} -- the part of the header after the ID, excluding initial whitespace
    - {cut_prefix} -- prefix removed by UnconditionalCutter (with positive length argument)
    - {cut_suffix} -- suffix removed by UnconditionalCutter (with negative length argument)
    - {adapter_name} -- name of the *last* adapter match or no_adapter if there was none
    - {match_sequence} -- the sequence that matched the adapter (this includes possible errors)
          or an empty string if there was no match
    - {rc} -- the string 'rc' if the read was reverse complemented (with --revcomp) or '' otherwise
    headeridcommentr   r   r   rcmatch_sequencetemplatec              
   C   sz   | dd}ztt|| _W n6 tk
rT } ztd| d| W 5 d }~X Y nX | | j| j || _| 	 | _
d S )N\t	Error in template '': )r   r   r   _tokensr   r   raise_if_invalid_variable	variables	_templatecompile_rename_function_renamer%   r   er#   r#   r&   r9   l  s    &zRenamer.__init__c                 C   s   | j j d| j dS )Nz('rS   )r   r(   r   r<   r#   r#   r&   r=   v  s    zRenamer.__repr__c                 C   s   t | jffS r"   )r   r   r<   r#   r#   r&   
__reduce__y  s    zRenamer.__reduce__c              	   C   s   ddddddddd	}t d
d | jD }dg}d|ks@d|krJ|d |d |D ] }|d| d||  d qX|d tdd| t }td|| |d S )z
        Create the function that computes a new name

        By creating the code dynamically, we can ensure that only those placeholder values are
        computed that are actually used in the template.
        z	read.nameid_r   z*info.cut_prefix if info.cut_prefix else ''z*info.cut_suffix if info.cut_suffix else ''z?info.matches[-1].adapter.name if info.matches else 'no_adapter'z'rc' if info.is_rc else ''z9info.matches[-1].match_sequence() if info.matches else '')r   r   r   r   r   r   r   r   c                 s   s   | ]}t |tr|jV  qd S r"   )
isinstancer   value)rJ   tokenr#   r#   r&   r     s    
 z2Renamer.compile_rename_function.<locals>.<genexpr>zdef rename(self, read, info):r   r   z+  id_, comment = self.parse_name(read.name)z  return self._template.format(z    =,z  )z%Generated code of rename function:
%s
rename)setr   rX   r   r   joindictexec)r%   codeZplaceholderslinesZplaceholder	namespacer#   r#   r&   r   |  s.    



zRenamer.compile_rename_functionN)tokensallowedr1   c                 C   s8   | D ].}t |tsq|j}||krtd| dqd S )NzError in template: Variable 'z' not recognized)r   r   r   r   )r   r   r   r   r#   r#   r&   r     s    

z!Renamer.raise_if_invalid_variable)	read_namer1   c                 C   s4   | j dd}t|dkr(|d |d fS | dfS dS )z0Parse read header and return (id, comment) tupler   )maxsplit   r    N)splitrV   )r   Zfieldsr#   r#   r&   
parse_name  s    zRenamer.parse_name)r    r!   r1   c                 C   s   |  | |||_|S r"   )r   r   r$   r#   r#   r&   r'     s    zRenamer.__call__)r(   r)   r*   r>   r   rt   r9   r=   r   r   rv   r   r   r   r   r   r   r   r   r'   r#   r#   r#   r&   r   P  s&   
 
r   c                   @   sp   e Zd ZdZedddZeee dddZe	e	e
e
ee	e	f dd	d
Ze	e	e
e
eeef dddZdS )PairedEndRenamerai  
    Rename paired-end reads using a template. The template is applied to both
    R1 and R2, and the same template variables as in the (single-end) renamer
    are allowed. However,
    these variables are evaluated separately for each read. For example, if `{comment}`
    is used, it gets replaced with the R1 comment in the R1 header, and with the R2
    comment in the R2 header.

    Additionally, all template variables except `id` can be used in the read-specific
    forms `{r1.variablename}` and `{r2.variablename}`. For example, `{r1.comment}`
    always gets replaced with the R1 comment, even in R2.
    r   c              
   C   sn   zt t|| _W n6 tk
rH } ztd| d| W 5 d }~X Y nX t| j|   |dd| _	d S )Nr   r   r   r   )
r   r   r   r   r   r   r   _get_allowed_variablesr   r   r   r#   r#   r&   r9     s    &zPairedEndRenamer.__init__)r1   c                  C   sF   t jdh dhB } t jddh D ] }| d|  | d|  q | S )Nr   rnr   zr1.zr2.)r   r   add)r   vr#   r#   r&   r     s
    z'PairedEndRenamer._get_allowed_variablesr,   c                 C   s   t |j|jsDt|jd }t|jd }td| d| d| ||||\}}t ||st|d }	t|d }
t|jd }td|	 d|
 d| d||_||_||fS )	Nr   r   zInput read IDs not identical: 'z' != ''z>After renaming R1 and R2, their IDs are no longer identical: 'z'. Original read ID: 'z'. )r   r   r   r   r8   r   r   )r%   r-   r.   r/   r0   id1id2name1name2Znew_id1Znew_id2r#   r#   r&   r'     s    
zPairedEndRenamer.__call__c                 C   s,  t |j\}}t |j\}}|j}	|j}
g }|||	|f|||
|ffD ]j\}}}}|jrx|jd jj}|jd  }nd}d}|t|||jr|jnd|j	r|j	nd||d qH| j
jf |dd|d tf |d tf |d d}| j
jf |d	d|d tf |d tf |d d}||fS )
Nr_   r   r   )r   r   r   r   r   r   r   )r   r   r   )r   r   r   )r   r   r   r^   rk   r   rX   r   r   r   r   r   r   )r%   r-   r.   r/   r0   r   Zcomment1r   Zcomment2Zheader1Zheader2dr   r   r   r!   r   r   r   r   r#   r#   r&   r     sP    




zPairedEndRenamer._renameN)r(   r)   r*   r>   rt   r9   rv   r   r   r   r   r   r'   r   r#   r#   r#   r&   r     s    	

r   c                   @   s0   e Zd ZdZdddZdd Zeddd	Zd
S )
ZeroCapperz:
    Change negative quality values of a read to zero
    !   c                 C   s4   || _ |}tdttt|t|| | _d S )Nr   )quality_basert   	maketransr   mapchrro   zero_cap_trans)r%   r   Zqbr#   r#   r&   r9   $  s    zZeroCapper.__init__c                 C   s   d| j  dS )NzZeroCapper(quality_base=r:   )r   r<   r#   r#   r&   r=   )  s    zZeroCapper.__repr__rj   c                 C   s    |d d  }|j | j|_ |S r"   )	qualities	translater   r$   r#   r#   r&   r'   ,  s    zZeroCapper.__call__N)r   r   r#   r#   r#   r&   r     s   
r   c                   @   s4   e Zd ZdeedddZdd Zeddd	Zd
S )NextseqQualityTrimmerr   cutoffbasec                 C   s   || _ || _d| _d S r   )r   r   trimmed_bases)r%   r   r   r#   r#   r&   r9   3  s    zNextseqQualityTrimmer.__init__c                 C   s   d| j  d| j dS )NzNextseqQualityTrimmer(cutoff=, base=r:   r   r<   r#   r#   r&   r=   8  s    zNextseqQualityTrimmer.__repr__rj   c                 C   s2   t || j| j}|  jt|| 7  _|d | S r"   )r   r   r   r   rV   )r%   r    r!   ra   r#   r#   r&   r'   ;  s    zNextseqQualityTrimmer.__call__N)r   r(   r)   r*   rs   r9   r=   r   r'   r#   r#   r#   r&   r   2  s   r   c                   @   s6   e Zd ZdeeedddZdd Zeddd	Zd
S )QualityTrimmerr   cutoff_frontcutoff_backr   c                 C   s   || _ || _|| _d| _d S r   )r  r  r   r   )r%   r  r  r   r#   r#   r&   r9   B  s    zQualityTrimmer.__init__c                 C   s   d| j  d| j d| j dS )NzQualityTrimmer(cutoff_front=z, cutoff_back=r   r:   r  r<   r#   r#   r&   r=   H  s    zQualityTrimmer.__repr__rj   c                 C   s@   t |j| j| j| j\}}|  jt|||  7  _||| S r"   )r   r   r  r  r   r   rV   )r%   r    r!   r`   ra   r#   r#   r&   r'   N  s       zQualityTrimmer.__call__N)r   r  r#   r#   r#   r&   r  A  s   r  c                   @   s2   e Zd ZdZdddZdd Zeeddd	Zd
S )PolyATrimmerz!Trim poly-A tails or poly-T headsFc                 C   s   t t| _|| _d S r"   )r   rs   r   revcomp)r%   r  r#   r#   r&   r9   Y  s    
zPolyATrimmer.__init__c                 C   s   dS )NzPolyATrimmer()r#   r<   r#   r#   r&   r=   ]  s    zPolyATrimmer.__repr__)recordr!   c                 C   sf   | j r2t|jdd}| j|  d7  < ||d  S t|j}| jt||   d7  < |d | S d S )NT)r  r   )r  r   rd   r   rV   )r%   r  r!   rD   r#   r#   r&   r'   `  s    
zPolyATrimmer.__call__N)F)	r(   r)   r*   r>   r9   r=   r   r   r'   r#   r#   r#   r&   r  V  s   
r  c                   @   s.   e Zd ZdZdd Zdd ZedddZd	S )
	ShortenerzUnconditionally shorten a read to the given length

    If the length is positive, the bases are removed from the end of the read.
    If the length is negative, the bases are removed from the beginning of the read.
    c                 C   s
   || _ d S r"   r   r   r#   r#   r&   r9   r  s    zShortener.__init__c                 C   s   d| j  dS )NzShortener(length=r:   r   r<   r#   r#   r&   r=   u  s    zShortener.__repr__rj   c                 C   s*   | j dkr|d | j  S || j d  S d S r   r   r$   r#   r#   r&   r'   x  s    
zShortener.__call__Nr   r#   r#   r#   r&   r	  k  s   r	  c                   @   s.   e Zd ZdZdd Zdd ZedddZd	S )
NEndTrimmerz(Trims Ns from the 3' and 5' end of readsc                 C   s   t d| _t d| _d S )Nz^N+zN+$)r   r   
start_trimend_trimr<   r#   r#   r&   r9     s    zNEndTrimmer.__init__c                 C   s   dS )NzNEndTrimmer()r#   r<   r#   r#   r&   r=     s    zNEndTrimmer.__repr__rj   c                 C   sN   |j }| j|}| j|}|r*| nd}|r:| nt|}||| S r   )rd   r  rn   r  searchendr`   rV   )r%   r    r!   rd   Z	start_cutZend_cutr#   r#   r&   r'     s    zNEndTrimmer.__call__Nr   r#   r#   r#   r&   r
    s   r
  )?r>   r   Zloggingcollectionsr   typesr   typingr   r   r   r   r   abcr	   r
   Zdnaior   r   Zqualtrimr   r   r   rA   r   r   r   r   r   r   r   Z	tokenizerr   r   r   r   r!   r   Z	getLoggerr   rW   r   r+   r3   r?   rw   r   	Exceptionr   r   r   r   r   r   r   r   r   r   r   r  r  r	  r
  r#   r#   r#   r&   <module>   sF   $	  0/a_ej