
    Xf*                         d Z ddlmZ ddlmZ ddlmZ ddlmZ ddlm	Z	 ddl
mZ ddl
mZ dd	lmZ d
dlmZ d Zd Zd Zd ZddZd Zd ZeeeedZd ZddZddZ G d de          ZdS )zBio.SeqIO support for the SnapGene file format.

The SnapGene binary format is the native format used by the SnapGene program
from GSL Biotech LLC.
    )datetimesub)unpack)parseString)Seq)
SeqFeature)SimpleLocation)	SeqRecord   )SequenceIteratorc              #     K   	 |                      d          }t          |          dk     rdS t          d|          d         }|                      d          }t          |          dk     rt          d          t          d|          d         }|                      |          }t          |          |k     rt          d          |||fV  )	a?  Iterate over the packets of a SnapGene file.

    A SnapGene file is made of packets, each packet being a TLV-like
    structure comprising:

      - 1 single byte indicating the packet's type;
      - 1 big-endian long integer (4 bytes) indicating the length of the
        packet's data;
      - the actual data.
    Tr   Nz>Br      zUnexpected end of packetz>I)readlenr   
ValueError)handlepacket_typelengthdatas       4lib/python3.11/site-packages/Bio/SeqIO/SnapGeneIO.py_iterater      s      *kk!nn{aFT;//2Qv;;??7888f%%a({{6""t99v7888FD))))*    c                     |j         rt          d          t          d| dz
  z  |          \  }}t          |                    d                    |_         d|j        d<   |dz  rd|j        d<   d
S d	|j        d<   d
S )z}Parse a DNA sequence packet.

    A DNA sequence packet contains a single byte flag followed by the
    sequence itself.
    z*The file contains more than one DNA packetz>B%dsr   ASCIIDNAmolecule_typecirculartopologylinearN)seqr   r   r   decodeannotations)r   r   recordflagssequences        r   _parse_dna_packetr'   7   s     z GEFFFW
3T::OE8X__W--..FJ*/F't| 2)3:&&&)1:&&&r   c                    t          |                    d                    }t          |d          }|dk    rd|j        d<   n
d|j        d<   t          |d          }|rt	          j        |d          |j        d	<   t          |d
          }|r||_        t          |d          }|r8|                    dd          d         |_        ||_	        |s|j        |_        dS dS dS )zParse a 'Notes' packet.

    This type of packet contains some metadata about the sequence. They
    are stored as a XML string with a 'Notes' root node.
    UTF-8Type	SyntheticSYNdata_file_divisionUNCLastModifiedz%Y.%m.%ddateAccessionNumberComments r   r   N)
r   r"   _get_child_valuer#   r   strptimeidsplitnamedescription)r   r   r$   xmltyper0   acccomments           r   _parse_notes_packetr>   I   s	    dkk'**
+
+CC((D{38/0038/0C00D I%-%6tZ%H%H6"
3 1
2
2C
 	sJ//G $mmC++A.$ 	$FIII	$ $	$ 	$r   c                     t          d|          \  }}}}|                    d          dk    rt          d          dS )zParse a SnapGene cookie packet.

    Every SnapGene file starts with a packet of this type. It acts as
    a magic cookie identifying the file as a SnapGene file.
    z>8sHHHr   SnapGenez%The file is not a valid SnapGene fileN)r   r"   r   )r   r   r$   cookieseq_typeexp_versionimp_versions          r   _parse_cookie_packetrE   f   sJ     28$1G1G.FHk;}}W++@AAA ,+r   Fc                 
   d |                      d          D             \  }}|dz
  }|r
|dz  }|dz  }||k    r7t          |t          |          |          }t          d||          }||z   }nt          |||          }|S )Nc              3   4   K   | ]}t          |          V  d S N)int).0xs     r   	<genexpr>z"_parse_location.<locals>.<genexpr>r   s(      77Q#a&&777777r   -r   )strandr   )r7   r
   r   )		rangespecrN   r$   	is_primerstartendl1l2locations	            r   _parse_locationrV   q   s    77)//#"6"6777JE3AIE  	
qs{{E3v;;v>>>As62227!%V<<<Or   c           	         t          |                    d                    }|                    d          D ]}i }t          |dd          }d}t	          t          |dd                    }|d	k    rd
}d}	g }
d|                    d          D ]}}t          |dd          dk    rt          |d          }dz  t          |||          }|	s|}	n|d
k    r||	z   }	n|	|z   }	t          |d          }|r|
                    |g           ~t          |
          dk    rD|d
k    rt          fd|
D                       }
d	                    d |
D                       g|d<   |	st          d          |                    d          D ]}t          |dd          }g }|                    d          D ]}|                    d          r3|                    t          |j        d         j                             J|                    d          r3|                    t          |j        d         j                             |                    d          r2|                    t	          |j        d         j                             |||<   t          |d          }|rd|vr|g|d<   n||d         vr|g|d<   t          |	||          }|j                            |           dS ) zParse a sequence features packet.

    This packet stores sequence features (except primer binding sites,
    which are in a dedicated Primers packet). The data is a XML string
    starting with a 'Features' root node.
    r)   Featurer;   misc_featuredefaultr   directionality1   Nr   Segmentstandardgapranger8   c                 *    g | ]\  }}|z
  d z   |gS )r    )rJ   ir8   n_partss      r   
<listcomp>z*_parse_features_packet.<locals>.<listcomp>   s*    $U$U$UDgkAot%<$U$U$Ur   ;c              3   *   K   | ]\  }}| d | V  dS ):Nre   )rJ   rf   r8   s      r   rL   z)_parse_features_packet.<locals>.<genexpr>   s0      &M&MD!}}d}}&M&M&M&M&M&Mr   partszMissing feature locationQzMissing qualifier nameerrorVtextpredefrI   labelr;   
qualifiers)r   r"   getElementsByTagName_get_attribute_valuerI   rV   appendr   reversedjoinr   hasAttribute_decode
attributesvaluer	   features)r   r   r$   r:   featurequalsr;   rN   r\   rU   subpartssegmentrngnext_locationr8   	qualifierqnameqvaluesr~   rg   s                      @r   _parse_features_packetr      s~    dkk'**
+
+C++I66 D( D(#GV^LLL *:CHHH
 
 QF33I>> 	1 	1G#GVZ@@EII&w88CqLG+C@@M 4(2(83#m3'88D 1$000x==1||#$U$U$U$UH$U$U$UVV!hh&M&MH&M&M&MMMNE'N 	97888 55c:: 	# 	#I(6)A  E G"77<< G G%%f-- GNN75+;F+C+I#J#JKKKK''11 GNN75+;H+E+K#L#LMMMM''.. GNN3u'7'>'D#E#EFFF"E%LL#GV44 	'e##"&gU7^++ "&fXDUCCCw''''ID( D(r   c           	      V   t          |                    d                    }|                    d          D ]}i }t          |d          }|r|g|d<   g }|                    d          D ]}t          |dd          }	t	          t          |d	d
                    }
|
dk    rd}
nd}
t          |	|
|d          }t	          t          |dd
                    dk    }|r||v r{|                    |           t          |d|          }|j                            |           dS )zParse a Primers packet.

    A Primers packet is similar to a Features packet but specifically
    stores primer binding features. The data is a XML string starting
    with a 'Primers' root node.
    r)   Primerr8   rs   BindingSiterU   zMissing binding site locationrn   boundStrand0rZ   r   r_   T)rP   
simplifiedprimer_bindrt   N)	r   r"   rv   rw   rI   rV   rx   r	   r   )r   r   r$   r:   primerr   r8   	locationssiter   rN   rU   r   r   s                 r   _parse_primers_packetr      s|    dkk'**
+
+C**844 , ,#FF33 	$"VE'N	//>> 	, 	,D&j(G  C -dM3OOOPPF{{&sFFdKKKH1$cRRRSSWXXJ h)33X&&& "   G
 O""7++++-	,, ,r   )r         
   c                 $    t          dd|           S )Nz<[^>]+> r   )rq   s    r   r|   r|     s    y"d###r   Nc                     |                      |          rt          | j        |         j                  S |rt	          |          |S rH   )r{   r|   r}   r~   r   )noder8   r[   ro   s       r   rw   rw     sK     tt,2333	 r   c                     |                      |          }|rG|d         j        r:|d         j        j        | j        k    rt          |d         j        j                  S |rt          |          |S )Nr   )rv   
childNodes
firstChildnodeType	TEXT_NODEr|   r   r   )r   r8   r[   ro   childrens        r   r4   r4     sx    ((..H	QK"	 QK"+t~==x{-2333	 r   c                   .     e Zd ZdZ fdZd Zd Z xZS )SnapGeneIteratorzParser for SnapGene files.c                 P    t                                          |dd           dS )a   Parse a SnapGene file and return a SeqRecord object.

        Argument source is a file-like object or a path to a file.

        Note that a SnapGene file can only contain one sequence, so this
        iterator will always return a single record.
        br@   )modefmtN)super__init__)selfsource	__class__s     r   r   zSnapGeneIterator.__init__&  s*     	cz:::::r   c                 0    |                      |          }|S )z9Start parsing the file, and return a SeqRecord generator.)iterate)r   r   recordss      r   parsezSnapGeneIterator.parse0  s    ,,v&&r   c              #     K   t          d          }t          |          }	 t          |          \  }}}n# t          $ r t	          d          dw xY w|dk    rt	          d          t          |||           |D ]/\  }}}t                              |          }| ||||           0|j        st	          d          |V  dS )z.Iterate over the records in the SnapGene file.NzEmpty file.	   z5The file does not start with a SnapGene cookie packetzNo DNA packet in file)	r   r   nextStopIterationr   rE   _packet_handlersgetr!   )r   r   r$   packetsr   r   r   handlers           r   r   zSnapGeneIterator.iterate5  s      46""	6(,W%K 	6 	6 	6]++5	6 $TUUUVT6222)0 	. 	.%K&**;77G"f---z 	64555s	   6 A)__name__
__module____qualname____doc__r   r   r   __classcell__)r   s   @r   r   r   #  s\        $$; ; ; ; ;  
      r   r   )F)NN)r   r   rer   structr   xml.dom.minidomr   Bio.Seqr   Bio.SeqFeaturer	   r
   Bio.SeqRecordr   
Interfacesr   r   r'   r>   rE   rV   r   r   r   r|   rw   r4   r   re   r   r   <module>r      s                      ' ' ' ' ' '       % % % % % % ) ) ) ) ) ) # # # # # # ( ( ( ( ( (* * *:2 2 2$$ $ $:B B B   &L( L( L(^&, &, &,T 


 	  $ $ $
      ' ' ' ' '' ' ' ' ' 'r   