U
    H$xeJ                     @   s  d Z ddlZddlZddlmZ ddlmZmZmZm	Z	m
Z
mZmZ ddlmZ ddlmZ ddlmZmZmZmZmZmZmZmZmZmZmZ eeZed	d
dZ eedddZ!eee	eef dddZ"G dd dZ#ee	eef  eee
f ee dddZ$eeeee
f ee dddZ%d%eeeee
f ee edddZ&eeee eeee
f edddZ'eee eeee
f ed d!d"Z(d#d$ Z)dS )&z
Parse adapter specifications
    N)Path)TypeOptionalListTupleAnyDictIterable)xopen)FastaReader   )AdapterFrontAdapterNonInternalFrontAdapterBackAdapterNonInternalBackAdapterAnywhereAdapterPrefixAdapterSuffixAdapterLinkedAdapterInvalidCharacterRightmostFrontAdapter)specc                 C   sz  ddddddddddddd}|  d}t }|D ]}| }|sDq2|d\}}}| }||krttd| d	|dkr|d
krtd| d	|| dk	r|| }q| }|d
krd}n*zt|}W n tk
r   t|}Y nX ||krtd| d|||< q2d|kr*d|kr*tdd|krFd|krFtdd|kr^d|d< |d= d|krvd|d< |d= |S )z/Parse key=value;key=value;key=value into a dictmax_error_rate
max_errorsmin_overlapN)eZ
error_rater   or   r   anywhererequiredoptionalindelsnoindels	rightmost;=zUnknown parameter '' zNo value given for key 'TzKey 'z' specified twicer    r   z>'optional' and 'required' cannot be specified at the same timer!   r"   z<'indels' and 'noindels' cannot be specified at the same timeF)splitdictstrip	partitionKeyError
ValueErrorintfloat)r   Zallowed_parametersfieldsresultZfieldkeyZequalsvalue r4   .lib/python3.8/site-packages/cutadapt/parser.pyparse_search_parameters   sd    





r6   )sequencereturnc                 C   s   d}d}t d| D ]}|dkr"q|dkrX|dkr:td|dkrJtd|}||7 }q|dkrt|}d|  kr|d	ksn td
| dqt|tr|dkrtd|dd |d |  }d}q|dkrtdd}qt|ts|dkrtd|S )z
    Replace all occurrences of ``x{n}`` (where x is any character) with n
    occurrences of x. Raise ValueError if the expression cannot be parsed.

    >>> expand_braces('TGA{5}CT')
    'TGAAAAACT'
    r'   Nz([{}]){z""{" must be used after a character}z"}" cannot be used herer   i'  Value z invalidz"}" expectedzExpected "{"zUnterminated expression)rer(   r-   r.   
isinstance)r7   r1   prevsr4   r4   r5   expand_bracesX   s6    	

rA   )spec1spec2r8   c                 C   sX   |dkrt d| s,|dkr"|}qPt dn$|sH|dkrBd}| }qP| }nt d||fS )Nr   z2No ellipsis ("...") allowed in "anywhere" adaptersbackzInvalid adapter specificationfrontzExpected either spec1 or spec2)r-   )rB   rC   adapter_typer   r4   r4   r5   _normalize_ellipsis   s    
rG   c                   @   s   e Zd ZdZee ee eeedddZdd Zdd Z	e
eeee ef d	d
dZeeed dddZe
eeee ee ef d	ddZe
dd Zdd ZdS )AdapterSpecificationa9  # noqa: E501
    Description of a single non-linked adapter.

    These are the attributes:

    - name (None or str)
    - restriction (None, 'anchored', or 'noninternal')
    - sequence (nucleotide sequence as string)
    - search parameters (dict with keys such as 'max_errors', 'min_overlap')
    - adapter_type ('front' for -a, 'back' for -g and 'anywhere' for -b)

    >>> AdapterSpecification.parse('a_name=ACGT;anywhere', 'back')
    AdapterSpecification(name='a_name', restriction=None, sequence='ACGT', parameters={'anywhere': True}, adapter_type='back')
    )namerestrictionr7   rF   r#   c                 C   s@   |dkst |dkst || _|| _|| _|| _|| _|| _d S )N)NanchorednoninternalrE   rD   r   )AssertionErrorrI   rJ   r7   
parametersrF   r#   )selfrI   rJ   r7   rO   rF   r#   r4   r4   r5   __init__   s    	zAdapterSpecification.__init__c                 C   s"   d | jj| j| j| j| j| jS )NzR{}(name={!r}, restriction={!r}, sequence={!r}, parameters={!r}, adapter_type={!r}))format	__class____name__rI   rJ   r7   rO   rF   rP   r4   r4   r5   __repr__   s    zAdapterSpecification.__repr__c                 C   s<   | j |j ko:| j|jko:| j|jko:| j|jko:| j|jkS N)rI   rJ   r7   rO   rF   )rP   otherr4   r4   r5   __eq__   s    



zAdapterSpecification.__eq__)r   r8   c                 C   s<   |  dd}d}t|dkr,|\}} | }|  } || fS )z_
        Parse an adapter specification given as 'name=adapt' into 'name' and 'adapt'.
        r%   r   N)r(   lenr*   )r   r0   rI   r4   r4   r5   _extract_name   s    z"AdapterSpecification._extract_name)r   rF   r8   c                 C   sz  |dkrt d|d\}}}| |\}}| }t|}t|}|dd}t|ddkrv| |d|i |dS z| |\}}	}W n t k
r   t d	dY nX |d
kr|	rt d|dkr|rt d|dk	r|}
n|	}
|dkr|
dk	rt dd|kr|
dkrt d|	ddt|krFt d|d  d| |rh|d
ks`|
dk	rht d| ||
||||S )af  
        Parse an adapter specification for a non-linked adapter (without '...')
        and return an AdapterSpecification instance.

        Allow:
        'back' and ADAPTER
        'back' and ADAPTERX
        'back' and ADAPTER$
        'front' and ADAPTER
        'front' and XADAPTER
        'front' and ^ADAPTER
        'anywhere' and ADAPTER
        rM   ,adapter_type must be front, back or anywherer$   r#   FXr   NzYou cannot use multiple placement restrictions for an adapter at the same time. Choose one of ^ADAPTER, ADAPTER$, XADAPTER or ADAPTERXrE   zIAllowed placement restrictions for a 5' adapter are XADAPTER and ^ADAPTERrD   zIAllowed placement restrictions for a 3' adapter are ADAPTERX and ADAPTER$r   zPPlacement restrictions (with X, ^, $) not supported for 'anywhere' (-b) adaptersr   rK   z~Setting 'min_overlap=' (or 'o=') for anchored adapters is not possible because anchored adapters always need to match in full.zmin_overlap=z exceeds length of adapter z1'rightmost' only allowed with regular 5' adapters)
r-   r+   r[   r*   r6   rA   poprZ   _parse_restrictionsget)clsr   rF   middleparameters_specrI   rO   r#   front_restrictionback_restrictionrJ   r4   r4   r5   parse   sX    zAdapterSpecification.parsec                 C   s   d }|  drd}| dd  } |   drJ|d k	r<tdd}| d} d }| drhd}| d d	 } |  dr|d k	rtd
d}| d} tt|tt| }|dkrtd|d ks|d kst||| fS )N^rK   r   r]   ztwo front restrictionsrL   ZxX$r<   ztwo back restrictionszfront and back restrictions)	
startswithupperr-   lstripendswithrstripr.   boolrN   )r   rd   re   Zn_placement_restrictionsr4   r4   r5   r_   #  s2    



z(AdapterSpecification._parse_restrictionsc                 C   s   | dkrR|r|dkst tS |dkr(tS |dkr4tS |dkr@tS td| dn^| dkr|dkrftS |dkrrtS |dkr~tS td| dn | d	kst |dkrt	S td
dS )zA
        restriction: None, "anchored", or "noninternal"
        rE   NrK   rL   r;   z$ for a front restriction not allowedrD   z# for a back restriction not allowedr   z5No placement may be specified for "anywhere" adapters)
rN   r   r   r   r   r-   r   r   r   r   )rF   rJ   r#   r4   r4   r5   _restriction_to_classA  s:    

z*AdapterSpecification._restriction_to_classc                 C   s   |  | j| j| jS rW   )ro   rF   rJ   r#   rU   r4   r4   r5   adapter_classh  s
      z"AdapterSpecification.adapter_classN)rT   
__module____qualname____doc__r   strrn   rQ   rV   rY   staticmethodr   r[   classmethodrf   r_   ro   rp   r4   r4   r4   r5   rH      s$   
	I$
&rH   )type_spec_pairssearch_parametersr8   c                 C   s(   g }| D ]\}}| t||| q|S )a  
    Create a list of Adapter classes from specification strings and adapter types.

    type_spec_pairs -- a list of (str, str) pairs, where the first is
      the adapter type (either 'front', 'back' or 'anywhere') and the second is the
      adapter specification string, such as "ACGT;o=3" or "file:adapters.fasta"

    search_parameters -- A dict with default search parameters. These can be overriden by the
      adapter specifications. They are passed as **kwargs when instantiating the
      adapter classes.
      Possible keys: max_error_rate, min_overlap, read_wildcards, adapter_wildcards, indels

    Return a list of appropriate Adapter instances.
    )extend$make_adapters_from_one_specification)rw   rx   adaptersrF   r   r4   r4   r5   !make_adapters_from_specificationsn  s    
r|   )r   rF   rx   r8   c              
   c   s2  |  ds|  ds|  drd}d}|  drB| dd } d}n|  dr`d| dd  } d	}| d
d d\}}}| }|t| t|D ]"\}	} t||  | |||	dV  qnrzt| ||V  W n^ tk
r, }
 z>t| 	 rd|  d|  d}t|
j
d d | n W 5 d}
~
X Y nX dS )zO
    Parse an adapter specification and yield appropriate Adapter classes.
    zfile:z^file:zfile$:r'   rg   r   N   rh      r$   )rI   zA file exists named 'zf'. To use the sequences in that file as adapter sequences, write 'file:' before the path, as in 'file:z'.r   
)ri   r+   copyupdater6   read_adapters_fastamake_adapterr   r   existsargs)r   rF   rx   Zanchoring_prefixZanchoring_suffixpath_rc   rO   rI   r   Zextra_messager4   r4   r5   rz     s@    	


rz   )r   rF   rx   rI   r8   c                 C   sl   |dkrt d| d\}}}|dkr@|r@|r@t|||||S |dkrZt|||\} }n|} t| |||S )a  
    Parse an adapter specification not using ``file:`` notation and return
    an object of an appropriate Adapter class.

    name -- Adapter name if not included as part of the spec. (If spec is
    'name=ADAPTER', name will be 'name'.)

    adapter_type -- describes which commandline parameter was used (``-a``
    is 'back', ``-b`` is 'anywhere', and ``-g`` is 'front').

    search_parameters -- dict with default search parameters
    rM   r\   z...)r-   r+   _make_linked_adapterrG   _make_not_linked_adapter)r   rF   rx   rI   rB   rb   rC   r4   r4   r5   r     s    r   )rB   rC   rI   rF   rx   r8   c                 C   s   |dkrt dt| d}t|d}|dkr6|j}|jdk	}|jdk	}| }	|	|j | }
|
|j |dkrd}d}n|}|}|	d|}|
d|}|	 |j
fdd	i|	}|	 |j
fdd
i|
}t|||||dS )z6Return a linked adapter from two specification stringsr   z*'anywhere' (-b) adapters may not be linkedrE   rD   NTr   rI   Zlinked_frontZlinked_back)front_adapterback_adapterfront_requiredback_requiredrI   )r-   rH   rf   rI   rJ   r   r   rO   r^   rp   r7   r   )rB   rC   rI   rF   rx   Z
front_specZ	back_specZfront_anchoredZback_anchoredZfront_parametersZback_parametersr   r   r   r   r4   r4   r5   r     sP    	

r   )r   rI   rF   rx   r8   c                 C   s   t | |}| }|jddr:|tttfkr:d|jd< d|jkrLtd|	 }|
|j |f |j|d krv|jn|d|S )Nr   FTZforce_anywherer   zA'required' and 'optional' can only be used within linked adapters)r7   rI   )rH   rf   rp   rO   r^   r   r   r   r-   r   r   r7   rI   )r   rI   rF   rx   Zaspecrp   rO   r4   r4   r5   r   
  s(    

r   c              	   c   sZ   t | dddB}t|}|D ].}|jdd}|r:|d nd}||jfV  qW 5 Q R X dS )z2
    Read adapter sequences from a FASTA file
    rbr   )modeZthreadsNr   )r
   r   rI   r(   r7   )r   fZfastarecordheaderrI   r4   r4   r5   r   &  s    r   )N)*rs   r=   Zloggingpathlibr   typingr   r   r   r   r   r   r	   r
   Zdnaio.readersr   r{   r   r   r   r   r   r   r   r   r   r   r   Z	getLoggerrT   Zloggerrt   r6   rA   rG   rH   r|   rz   r   r   r   r   r4   r4   r4   r5   <module>   sX   $4
=( Y

2 
 
6
