a
    ŽÞd   ã                   @   sL   d dl Z d dlZdd„ Zdd„ Zdd„ Zdd	„ Zd
d„ Zdd„ Zdd„ ZdS )é    Nc                 C   s,   | j r| jn| j}| j r| jn| j}||fS )z3 return external coordinates of aligned read bases )Ú
is_reverseZaendÚpos)ÚreadZfivepZthreep© r   ú.lib/python3.9/site-packages/mapdamage/align.pyÚget_coordinates   s    r   c                 C   s\   t | ƒ}t| ƒ}td|| ƒ}t || || ƒ}| |||¡ ¡ }	| |||¡ ¡ }
|	|
fS )zk return reference sequences before and after the read
  check for extremities and return what is available r   )ÚminÚmaxZfetchÚupper)ZcoordZchromZ
reflengthsÚlengthÚrefZ	coord_minZ	coord_maxZ
pos_beforeZ	pos_afterÚbeforeZafterr   r   r   Ú
get_around   s    r   c                 C   sp   t |ƒ}t| dƒD ]\}}dg| |||…< qt |ƒ}t| dƒD ]\}}dg| |||…< q@d |¡d |¡fS )zš insert gaps according to the cigar string
  deletion: gaps to be inserted into read sequences,
  insertions: gaps to be inserted into reference sequence é   ú-é   Ú )ÚlistÚparse_cigarÚjoin)Ú	cigarlistÚseqr   ÚlrefÚnbrÚidxÚlreadr   r   r   Úalign)   s    r   c                 C   sÐ   t |ƒ}t| dƒD ]\}}dg| |||…< qt |ƒ}t |ƒ}	t| dƒD ],\}}dg| |||…< dg| |	||…< qHt|	ƒD ]4\}}
t|
ƒd |k r~|| dkr~d||< d||< q~d |¡d |	¡d |¡fS )z¨ insert gaps according to the cigar string
  deletion: gaps to be inserted into read sequences and qualities,
  insertions: gaps to be inserted into reference sequence r   r   r   é!   ÚNr   )r   r   Ú	enumerateÚordr   )r   r   ZqualZ	thresholdr   r   r   r   r   ZlqualZscorer   r   r   Úalign_with_qual8   s    
r!   c                 C   s’   | j r
dnd}|| | | }tt|ƒ||ƒD ]^\}	}
}|
dv r.|dv r.|dkrd|| |	  d7  < ||
kr.d||
f }|| |	  d7  < q.dS )z~ count mismatches using aligned reference and read,
  must be redone since N in reference were randomly replaced by any bases r   ú+zACGT-r   z%s>%sN)r   ÚzipÚrange)r   r   Zrefseqr   r   ÚtabÚendÚstdZsubtableÚiZnt_seqZnt_refZmutr   r   r   Úget_misO   s    r)   c                 C   sD   d}g }d}| D ].\}}||kr.|  ||g¡ ||v r||7 }q|S )z| for a specific operation (mismach, match, insertion, deletion... see above)
  return occurences and index in the alignment r   )r   r   r   é   é   )Úappend)r   ZopeZtlengthZ
coordinateZoplistZ	operationr   r   r   r   r   a   s    
r   c                    sl   ‡ ‡fdd„}| j rdnd}tj | jd¡D ]:\}}|dkrL| j rFdnd}n| j rVdnd}||||ƒ q,d	S )
z* record soft clipped bases at extremities c                    s6   t dt|ˆ ƒƒD ] }ˆ|  | d |  d7  < qd S )Nr   ÚSr   )r$   r   )r&   r'   Úbasesr(   ©r   r%   r   r   Úupdate_tabler   s    z*record_soft_clipping.<locals>.update_tabler   r"   é   r   Z3pZ5pN)r   Ú	mapdamager   r   Zcigar)r   r%   r   r0   ZstrandZnbasesr   r&   r   r/   r   Úrecord_soft_clippingp   s    r3   )	Ú	itertoolsr2   r   r   r   r!   r)   r   r3   r   r   r   r   Ú<module>   s   