
    VA(f"                     ^   d Z ddlZddlZ ej        dd          Z ej        dd          Zd Zi ddd	d	d
d
dddddd	dd
ddddddddddddddddddd d!d"d#d$d%d&Zi ddd	dd
dddddddddddddddddd!dd"d'd#d'd$d(d%d(Zd) Z	d* Z
d+ Zd, Zd- ZdS ).z>Tools for working with files in the samtools pileup -c format.    NPileupSubstitution)

chromosomeposreference_basegenotypeconsensus_qualitysnp_qualitymapping_qualitycoverage
read_basesbase_qualitiesPileupIndel)r   r   r   r   r   r	   r
   r   first_allelesecond_allelereads_firstreads_second
reads_diffc              #     K   t           d t           t           t          t          t          t          t           t           f
}t           d t           t           t          t          t          t          t           t           t          t          t          f}| D ]}|dd                                         }|d         dk    rJ	 t          d t	          ||          D              V  O# t
          $ r t          j        d|z            w xY w	 t          d	 t	          ||          D              V  # t
          $ r t          j        d|z            w xY wdS )
a/  iterate over ``samtools pileup -c`` formatted file.

    *infile* can be any iterator over a lines.

    The function yields named tuples of the type :class:`pysam.Pileup.PileupSubstitution`
    or :class:`pysam.Pileup.PileupIndel`.

    .. note::

       The parser converts to 0-based coordinates
    c                 &    t          |           dz
  S N   intxs    ,lib/python3.11/site-packages/pysam/Pileup.py<lambda>ziterate.<locals>.<lambda>0       Q!     c                 &    t          |           dz
  S r   r   r   s    r   r   ziterate.<locals>.<lambda>2   r   r   N   *c                 *    g | ]\  }} ||          S  r%   .0r   ys      r   
<listcomp>ziterate.<locals>.<listcomp>9   s$    #H#H#HTQAAaDD#H#H#Hr   zparsing error in line: `%s`c                 *    g | ]\  }} ||          S r%   r%   r&   s      r   r)   ziterate.<locals>.<listcomp>>   s$    *O*O*ODAq11Q44*O*O*Or   )	strr   splitr   zip	TypeErrorpysamSamtoolsErrorr   )infile
conv_subst
conv_indellineds        r   iterater6   #   se      ++SsCc35J++S#ssCc3S:J  P P"IOOQ43;;P!#H#HSQ5G5G#H#H#HIIIII P P P)*G$*NOOOPP(*O*OC
A<N<N*O*O*OPPPPP P P P)*G$*NOOOPP Ps   6#C"C< #D$$"EACGTAACCGGTTUUUAGrGARCTr(   TCYACmCAMGTkTGKsSwW)CGGCATTArT   rV   c                 @    t           |                                          S )zencode genotypes like GG, GA into a one-letter code.
    The returned code is lower case if code[0] < code[1], otherwise
    it is uppercase.
    )ENCODE_GENOTYPEuppercodes    r   encodeGenotyper]   ^   s    
 4::<<((r   c                     t           |          S )z|decode single letter genotypes like m, M into two letters.
    This is the reverse operation to :meth:`encodeGenotype`.
    )DECODE_GENOTYPEr[   s    r   decodeGenotyper`   f   s     4  r   c                   	
 d 	d 
	
fd}g g }}d}| D ]T}	  |||          \  }}n# t           $ r Y  n7w xY w|                    |           |dk    r|                    |           Ud}|rt                      t          t          |                    dk    s
J d            |d	         }d
                    |          }||fS )z*translate indel from vcf to pileup format.c                     t          t          |           t          |                    }t          |          D ] }| |         ||         k    r| d|         c S !| d|         S )z'get common prefix of strings s1 and s2.Nminlenranges1s2nr   s       r   	getPrefixz0translateIndelGenotypeFromVCF.<locals>.getPrefixq   sf    BR!!q 	 	A!u1~~"1"v "1"vr   c                    t          t          |           t          |                    }| d         |d         k    rdS t          d| dz
  d          D ]#}| |         ||         k    r| |dz   d         c S $| | d         S )z&get common sufix of strings s1 and s2.r!    r   Nrc   rg   s       r   	getSuffixz0translateIndelGenotypeFromVCF.<locals>.getSuffixy   s    BR!!b6RV2rA262&& 	" 	"A!u1~~!a%&&z!!! 1"##wr   c                    | |k    rdS t          |          t          |           k    r|                    |           r,d|t          |           d          z  t          |           dz
  fS |                    |           rd|d t          |                     z  dfS  ||           } ||           }t          |          t          |          z   t          |           z
  }|dk     rt                      d|t          |          t          |          |z
            z  t          |          dz
  fS t          |          t          |           k     r|                     |          r,d| t          |          d          z  t          |          dz
  fS |                     |          rd| d t          |                   z  dfS  ||           } ||           }t          |          t          |          z   t          |          z
  }|dk     rt                      d| t          |          t          |          |z
            z  t          |          fS J d            )N)r#   r   z-%sr   r!   r   z+%szsnp?)re   
startswithendswith
ValueError)variantrefprefixsuffixsharedrk   ro   s        r   getGenotypez2translateIndelGenotypeFromVCF.<locals>.getGenotype   sQ   c>>6s88c'll""~~g&& Ys3w<<==113w<<!3CCCg&& 	Ys>S\\M>22B66"300"300Vs6{{2S\\AA::$,,&s3v;;Vv1E/F#FGGVWXXXXXG$$!!#&& Yws3xxyy113s88a<??!!#&& 	YwyCy11144"300"300Vs6{{2SXX=A::$,,&ws6{{S[[65I3J'JKKSQW[[XXf1r   Tr#   Fr   zmultiple offsets for indelr   /)rs   appendre   setjoin)vcf_genotypesru   ry   	genotypesoffsetsis_errorrt   goffsetrk   ro   s            @@r   translateIndelGenotypeFromVCFr   m   s6       # # # # # #R RwIH   	#GS11IAvv 	 	 	EE	 	88NN6"""  lls7||!!!#?!!!QZF##Ifs   *
88c                 &   | j         }| j        }| j        }|g| j        z   | |         }|d         }t	          |          dk    rt          dt          |           z            |d         }|d         dk    rdS fd|D             }|                    ddg          d         x}}| j                            d	dg          d         }	|                    d
d          }
t	          |          dk    s"t          d | j        D                       dk    rAt          ||          \  }}t          |||z   d||||	|
|dt	          |          z  ddd          S t          d                    |                    }d}d}t          |||||||	|
||
  
        S )z$convert vcf record to pileup record.rL   r   z%only single genotype per position, %sr   .Nc                 F    g | ]}|d k    t          |                   S )rz   r   )r'   r   allelless     r   r)   zvcf2pileup.<locals>.<listcomp>   s)    AAAaS#a&&!r   GQMQDPc                 ,    g | ]}t          |          S r%   )re   )r'   r   s     r   r)   zvcf2pileup.<locals>.<listcomp>   s    !:!:!:Q#a&&!:!:!:r   r#   <rm   )contigr   ru   altre   rs   r+   getinfomaxr   r   r]   r}   r   )vcfsampler   r   	referencedatar   r	   r   r
   r   r   r   r   r   r   s                  @r   
vcf2pileupr      s    J
'CI{SW$Hv;D T
I
9~~@CHHMNNN!I |stAAAA9AAAI '+hhtaS&9&9!&<<K#hll4!--a0Oxxa  H
9~~S!:!:#'!:!:!:;;a??8INN&:<#,&*##X.  	 ""'')"4"455
!*c9"*,="-"*J"0	2 2 	2r   c              #      K   t          j                    }|                    |            ||                                vrt	          d          |                                D ]}t          ||          }|r|V  dS )a  iterate over a vcf-formatted file.

    *infile* can be any iterator over a lines.

    The function yields named tuples of the type
    :class:`pysam.Pileup.PileupSubstitution` or
    :class:`pysam.Pileup.PileupIndel`.

    Positions without a snp will be skipped.

    This method is wasteful and written to support same legacy code
    that expects samtools pileup output.

    Better use the vcf parser directly.

    zsample %s not vcf fileN)r/   VCFconnect
getsamplesKeyErrorfetchr   )r1   r   r   rowresults        r   iterate_from_vcfr      s      " )++CKKS^^%%%%/000yy{{  C(( 	LLL r   )__doc__collectionsr/   
namedtupler   r   r6   rY   r_   r]   r`   r   r   r   r%   r   r   <module>r      si   D D     +[+,@
-BC C  %k$]&78 8"P P P@		3	S	"%s	#	S		%)3	04c	 	#	 S	 	#		 S		
 	#	
 S	 	#	 S	 S
S	 	 	  	
 
 D  D  D  D  D  D ) ) )! ! !V V Vr72 72 72t    r   