
    Xf3U                         d Z ddlmZ ddlmZ ddlmZ ddlmZ ddlm	Z	 ddlm
Z
  G d d	e          Zd
 Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Z G d de
          ZdS )a  Implementations of Biopython-like Seq objects on top of BioSQL.

This allows retrieval of items stored in a BioSQL database using
a biopython-like SeqRecord and Seq interface.

Note: Currently we do not support recording per-letter-annotations
(like quality scores) in BioSQL.
    )Optional)
SeqFeature)Seq)SequenceDataAbstractBaseClass)_RestrictedDict)	SeqRecordc                   4     e Zd ZdZdZd fd	Zd Zd Z xZS )_BioSQLSequenceDataz9Retrieves sequence data from a BioSQL database (PRIVATE).)
primary_idadaptor_lengthstartr   c                     || _         || _        || _        || _        t	                                                       dS )aU  Create a new _BioSQLSequenceData object referring to a BioSQL entry.

        You wouldn't normally create a _BioSQLSequenceData object yourself,
        this is done for you when retrieving a DBSeqRecord object from the
        database, which creates a Seq object using a _BioSQLSequenceData
        instance as the data provider.
        N)r   r   r   r   super__init__)selfr   r   r   length	__class__s        -lib/python3.11/site-packages/BioSQL/BioSeq.pyr   z_BioSQLSequenceData.__init__#   s=     %
    c                     | j         S )z"Return the length of the sequence.)r   r   s    r   __len__z_BioSQLSequenceData.__len__1   s
    |r   c                 d   t          |t                    rE|                    | j                  \  }}}t	          t          |||                    }|dk    rdS n|}|dk     r || j        z  }|dk     rt          |          n|| j        k    rt          |          | j                            | j	        | j
        |z   | j
        |z   dz             }t          |          S |dk    r}|dk    rS|| j        k    rH| j                            | j	        | j
        | j
        | j        z             }|                    d          S t          | j	        | j        | j
        |z   |          S | j                            | j	        | j
        |z   | j
        |z             }	|	dd|                             d          S )z@Return a subsequence as a bytes or a _BioSQLSequenceData object.r   r      ASCIIN)
isinstancesliceindicesr   lenrange
IndexErrorr   get_subseq_as_stringr   r   ordencoder
   )
r   keyr   endstepsizeicsequencefulls
             r   __getitem__z_BioSQLSequenceData.__getitem__5   s   c5!! 	"{{4<88E3uUC..//Dqyys  A1uuT\!q55$S//) dl"" oo%11aa!1C A q66M199zzddl22<<<OTZdl1J   w/// +OT\4:3Et  
 <44e!3TZ#5E D $<&&w///r   )r   r   )	__name__
__module____qualname____doc__	__slots__r   r   r.   __classcell__)r   s   @r   r
   r
      sf        CC=I       &0 &0 &0 &0 &0 &0 &0r   r
   c                     |                      d|f          }|sd S t          |          dk    r t          dt          |           d          |d         \  }t          |          S )Nz5SELECT length FROM biosequence WHERE bioentry_id = %sr   Expected 1 response, got .r   )execute_and_fetchallr    
ValueErrorint)r   r   seqsgiven_lengths       r   _retrieve_seq_lenr=   ^   su    ''?* D  t
4yyA~~ASYYAAABBB1gO\|r   c                    |                      d|f          }|sd S t          |          dk    r t          dt          |           d          |d         \  }}}	 t          |          }t          |          }||k    rt          d| d|           d}n# t          $ r |t          d	| d          |                      d
|f          }t          |          dk    r t          dt          |           d          |d         \  }}}|rt          d| d          t          |          }d}~Y nw xY w~|r"t          || d|          }t          |          S t          d |          S )NzLSELECT alphabet, length, length(seq) FROM biosequence WHERE bioentry_id = %sr   r6   r7   r   z''length' differs from sequence length, z, Tz$Expected 'length' to be 'None', got zDSELECT alphabet, length, seq FROM biosequence WHERE bioentry_id = %sz*Expected 'seq' to have a falsy value, got F)r   r   r   )r8   r    r9   r:   	TypeErrorr
   r   )	r   r   r;   moltyper<   r   have_seqseqdatas	            r   _retrieve_seqrE   k   s    ''V	 D  
4yyA~~ASYYAAABBB$(G!G\6V<((\!!R,RR&RR      MFMMMNNN++RM
 
 t99>>ETEEEFFF%)!W"s 	RP#PPPQQQ\""CC 	 (":waOOO4yy4''''s   ;B BD76D7c                     g }|                      d|f          }|D ]2\  }}}|r|dk    r| d| }n|}|                    | d|            3|S )zBRetrieve the database cross references for the sequence (PRIVATE).z{SELECT dbname, accession, version FROM bioentry_dbxref join dbxref using (dbxref_id) WHERE bioentry_id = %s ORDER BY "rank"0r7   :)r8   append)r   r   _dbxrefsdbxrefsdbname	accessionversionvs           r   _retrieve_dbxrefsrP      s    H**	 
 G '. ) )"	7 	w#~~((w((AAA6A((((Or   c                 r   d}|                      ||f          }g }|D ]\  }}}|                      d|f          }i }	|D ].\  }
}|	                    |
g                               |           /|                      d|f          }|D ]5\  }
}|
 d| }|	                    dg                               |           6|                      d|f          }g }|D ]\  }}}}|r|dz  }|dk    rd }|d	vrt          d
|d|          |.|,||k     r&dd l}ddlm} |                    d|||fz  |           |t          j	                    }|t          j	                    }|                    ||||f           |                      d|f          }i }|D ](\  }}}}|r|dk    r| d| }n|}|dk    rd }||f||<   )t          j        |          }||_
        |	|_        t          |          dk    rn(t          |          dk    rz|d         \  }}}}t          | |          |_        |                    |d          \  }}t          j        ||          |_        ||j        _        ||j        _        ||j        _        ng }|D ]N}|\  }}}}|                    |d          \  }}|                    t          j        |||||                     Od |D             }t          |          dk    rd|v r|d d d         }t          j        |d          |_        |                    |           |S )NzSELECT seqfeature_id, type.name, "rank" FROM seqfeature join term type on (type_term_id = type.term_id) WHERE bioentry_id = %s ORDER BY "rank"zvSELECT name, value FROM seqfeature_qualifier_value  join term using (term_id) WHERE seqfeature_id = %s ORDER BY "rank"zSELECT dbxref.dbname, dbxref.accession FROM dbxref join seqfeature_dbxref using (dbxref_id) WHERE seqfeature_dbxref.seqfeature_id = %s ORDER BY "rank"rH   db_xrefzeSELECT location_id, start_pos, end_pos, strand FROM location WHERE seqfeature_id = %s ORDER BY "rank"r   r   )r   NzInvalid strand z% found in database for seqfeature_id )BiopythonWarningz<Inverted location start/end (%i and %i) for seqfeature_id %szsSELECT location_id, dbname, accession, version FROM location join dbxref using (dbxref_id) WHERE seqfeature_id = %srG   r7    )type)NN)strandrefref_dbc                     h | ]	}|j         
S  )rW   ).0_s     r   	<setcomp>z%_retrieve_features.<locals>.<setcomp>,  s    ...Aqx...r   rS   join)r8   
setdefaultrI   r9   warningsBiorT   warnr   UnknownPosition_seqfeature_id
qualifiersr    "_retrieve_location_qualifier_valuelocation_operatorgetSimpleLocationlocationrW   rY   rX   CompoundLocation)r   r   sqlresultsseq_feature_listseqfeature_idseqfeature_typeseqfeature_rankqvsrf   qv_nameqv_valuevalue	locationslocation_idr   r'   rW   ra   rT   remote_resultslookuprL   rM   rN   rO   featurelocsrk   strandss                                 r   _retrieve_featuresr~      s   	  **3>>G;B ~) ~)7** 
 
 
!$ 	@ 	@GX!!'2..55h????** 
 
 "% 	? 	?GX++++E!!)R0077>>>>.. 
 
 	 07 	@ 	@+KV 
{{^++ j*0&&--A    S_u000000'*/m)DE$   }"244{ 022k5#v>???? 55( 	
 
 7E 		. 		.3KG 7c>> ,,7,, ||#)1+F;'_===!.'y>>Q^^q  .7l+KV )K) )G% %jjlCCOFG)8DDG&,G#&,G##*G  D%  2:/UC"(**[,"G"G-s6wv      /....G7||q  R7]] DDbDz):4HHG 	((((r   c                 d    |                      d|f          }	 |d         S # t          $ r Y dS w xY w)NzASELECT value FROM location_qualifier_value WHERE location_id = %sr   rU   )execute_and_fetch_col0r"   )r   rx   rv   s      r   rg   rg   :  sO    **K	 EQx   rrs   ! 
//c                 j   i }|                     t          | |                     |                     t          | |                     |                     t          | |                     |                     t	          | ||                     |                     t          | |                     |S N)update_retrieve_alphabet_retrieve_qualifier_value_retrieve_reference_retrieve_taxon_retrieve_comment)r   r   taxon_idannotationss       r   _retrieve_annotationsr   E  s    K)':>>???0*EEFFF*7J??@@@w
HEEFFF(*==>>>r   c                 f   |                      d|f          }t          |          dk    r t          dt          |           d          |d         }t          |          dk    r t          dt          |           d          |d         }|dk    rd}n|d	k    rd
}n|dk    rd}nd }|d|iS i S )Nz7SELECT alphabet FROM biosequence WHERE bioentry_id = %sr   r6   r7   r   z%Expected 1 alphabet in response, got dnaDNArnaRNAproteinmolecule_type)r8   r    r9   )r   r   rn   	alphabetsalphabetr   s         r   r   r   O  s    **AJ= G 7||qDS\\DDDEEE
I
9~~RYRRRSSS|H5	U			Y		! //	r   c                     |                      d|f          }i }|D ]H\  }}|dk    rd}n|dk    rd}n|dk    rd}|                    |g                               |           I|S )NzqSELECT name, value FROM bioentry_qualifier_value JOIN term USING (term_id) WHERE bioentry_id = %s ORDER BY "rank"keywordkeywordsdate_changeddatesecondary_accession
accessions)r8   r`   rI   )r   r   rs   rf   namerv   s         r   r   r   g  s    

&
&	 
 C J 6 6e9DD^##DD***DdB''..u5555r   c                 b   |                      d|f          }g }|D ]\  }}}}}}	}
t          j                    }||"||dz  }t          j        ||          g|_        |r||_        |r||_        ||_        |	dk    r|
|_        n|	dk    r|
|_	        |
                    |           |rd|iS i S )NzSELECT start_pos, end_pos,  location, title, authors, dbname, accession FROM bioentry_reference JOIN reference USING (reference_id) LEFT JOIN dbxref USING (dbxref_id) WHERE bioentry_id = %s ORDER BY "rank"r   PUBMEDMEDLINE
references)r8   r   	Referencerj   rk   authorstitlejournal	pubmed_id
medline_idrI   )r   r   refsr   r   r'   rk   r   r   rL   rM   	references               r   r   r   |  s    ''	 

 
D JCG % %?sHeWfi(**	3? 
",";E3"G"G!HI 	( 'I 	$#IO$	X"+Iy  #,I )$$$$ j))	r   c                    i }|                      d|f          }|r|d         |d<   |                      d|f          }|r|d         |d<   |                      d|f          }|r|d         r|d         dk    r|d         |d<   g }|r<|                     d	|f          \  }}	}
||
k    rn|                    d|           |
}|<|r||d
<   |S )NzVSELECT name FROM taxon_name WHERE taxon_id = %s AND name_class = 'genbank common name'r   sourcezRSELECT name FROM taxon_name WHERE taxon_id = %s AND name_class = 'scientific name'organismz3SELECT ncbi_taxon_id FROM taxon WHERE taxon_id = %srG   
ncbi_taxidzSELECT taxon_name.name, taxon.node_rank, taxon.parent_taxon_id FROM taxon, taxon_name WHERE taxon.taxon_id=taxon_name.taxon_id AND taxon_name.name_class='scientific name' AND taxon.taxon_id = %staxonomy)r   execute_oneinsert)r   r   r   acommon_namesscientific_namesncbi_taxidsr   r   rankparent_taxon_ids              r   r   r     sE   
A11	2	 L
  &"1o(55	.	 
  ,(+*00={ K  ){1~ )+a.C*?*?%a., H
 #&-&9&9'
 K'
 '
#dO && 4   "!  #$  ! *Hr   c                 X    |                      d|f          }d |D             }|rd|iS i S )NzESELECT comment_text FROM comment WHERE bioentry_id=%s ORDER BY "rank"c                     g | ]
}|d          S )r   r[   )r\   comms     r   
<listcomp>z%_retrieve_comment.<locals>.<listcomp>  s    (((DQ(((r   comment)r8   )r   r   rs   commentss       r   r   r     sL    

&
&O	 C )(C(((H 8$$	r   c                   |   e Zd ZdZd Zd Zd Zd Z eeeed          Z	ede
e         fd            Zej        d	e
e         dd
fd            Zej        dd            Zd Zd Zd Z eeeed          Zedej        fd            Zej        d	eej                 dd
fd            Zej        dd            Zd
S )DBSeqRecordz4BioSQL equivalent of the Biopython SeqRecord object.c           	      4   || _         || _        | j                             d| j        f          \  | _        | _        | _        }}| _        | _        | _        |r|dk    r| d| | _	        n|| _	        t          ||          }t          |          | _        dS )a8  Create a DBSeqRecord object.

        Arguments:
         - adaptor - A BioSQL.BioSeqDatabase.Adaptor object
         - primary_id - An internal integer ID used by BioSQL

        You wouldn't normally create a DBSeqRecord object yourself,
        this is done for you when using a BioSeqDatabase object
        zSELECT biodatabase_id, taxon_id, name, accession, version, identifier, division, description FROM bioentry WHERE bioentry_id = %srG   r7   r?   N)_adaptor_primary_idr   _biodatabase_id	_taxon_idr   _identifier	_divisiondescriptionidr=   r   _per_letter_annotations)r   r   r   rM   rN   r   s         r   r   zDBSeqRecord.__init__  s      % M%%& 
 
		
 NIN  	 w#~~"..W..DGGDG
 #7J77'6f'E'E'E$$$r   c                 n    t          | d          st          | j        | j                  | _        | j        S )N_seq)hasattrrE   r   r   r   r   s    r   	__get_seqzDBSeqRecord.__get_seq  s3    tV$$ 	G%dmT5EFFDIyr   c                     || _         d S r   r   )r   rC   s     r   	__set_seqzDBSeqRecord.__set_seq  s    			r   c                 
    | ` d S r   r   r   s    r   	__del_seqzDBSeqRecord.__del_seq  s    IIIr   z
Seq objectreturnc                 n    t          | d          st          | j        | j                  | _        | j        S )zDatabase cross references.rJ   )r   rP   r   r   rJ   r   s    r   rK   zDBSeqRecord.dbxrefs   s5     tZ(( 	O-dmT=MNNDM}r   rv   Nc                     || _         d S r   rJ   r   rv   s     r   rK   zDBSeqRecord.dbxrefs'  s    r   c                 
    | ` d S r   r   r   s    r   rK   zDBSeqRecord.dbxrefs+  s    MMMr   c                 n    t          | d          st          | j        | j                  | _        | j        S )N	_features)r   r~   r   r   r   r   s    r   __get_featureszDBSeqRecord.__get_features/  s3    t[)) 	Q/t?OPPDN~r   c                     || _         d S r   r   )r   featuress     r   __set_featureszDBSeqRecord.__set_features4  s    !r   c                 
    | ` d S r   r   r   s    r   __del_featureszDBSeqRecord.__del_features7  s    NNNr   Featuresc                     t          | d          sQt          | j        | j        | j                  | _        | j        r| j        | j        d<   | j        r| j        | j        d<   | j        S )zAnnotations._annotationsgidata_file_division)r   r   r   r   r   r   r   r   r   s    r   r   zDBSeqRecord.annotations<  sx     t^,, 	I 5t/! !D  ;*.*:!$'~ I:>.!"67  r   c                 *    |r	|| _         d S i | _         d S r   r   r   s     r   r   zDBSeqRecord.annotationsI  s&     	# %D "Dr   c                 
    | ` d S r   r   r   s    r   r   zDBSeqRecord.annotationsP  s    r   )r   N)r/   r0   r1   r2   r   _DBSeqRecord__get_seq_DBSeqRecord__set_seq_DBSeqRecord__del_seqpropertyrC   liststrrK   setterdeleter_DBSeqRecord__get_features_DBSeqRecord__set_features_DBSeqRecord__del_featuresr   r   _AnnotationsDictr   r   r[   r   r   r   r     s       >>&F &F &FP  
     (9iL
A
ACc    X ^T#Y 4    ^ _   _  
" " "   x
SSH
!Y7 
! 
! 
! X
! #)*D!E #$ # # # #      r   r   N)r2   typingr   rb   r   Bio.Seqr   r   Bio.SeqRecordr   r   r
   r=   rE   rP   r~   rg   r   r   r   r   r   r   r   r[   r   r   <module>r      s                      1 1 1 1 1 1 ) ) ) ) ) ) # # # # # #=0 =0 =0 =0 =07 =0 =0 =0@
 
 
.( .( .(b  &H H HV      0  *$ $ $N4 4 4n
 
 
k k k k k) k k k k kr   