
    &hz                         d Z ddlZddlmZ ddlmZ ddlmZ daddZ G d	 d
      Z G d d      Z	 G d d      Z
 G d de
      ZeedZdddddddZ G d d      Zy)zConnect with a BioSQL database and load Biopython like objects from it.

This provides interfaces for loading biological objects from a relational
database, and is compatible with the BioSQL standards.
    N   )BioSeq)DBUtils)LoaderFc                 .   | dk(  rt        d      t        j                  dk(  r,ddlm} |}| dv rd}d|d	   z   d
z   }n | dv rd}d|d	   z   d
z   }nt        | dg      }|j                  }|j                         }| dv r2t        j                  dk7  rd|v r|d   |d<   |d= d|v r*|d   |d<   |d= nd|v r|d   |d<   |d= d|v r|d   |d<   |d= | dv r|j                  d      sd|d<   t        j                  dk(  rU| dv r% ||j                  dd      z   |d   |d         }nD| dv r@ ||j                  dd      z   dz   |d   |d         }n| dv r ||d         }n |d$i |}t        j                  dk(  rt        ||       }	nt        |      }	| dv r|	j                  j                  d       | dv r=d}
|	j                  j                  |
      rddl}ddlm} |j!                  d |       d!a|	S | d"k(  r|	j                  j                  d#       |	S )%a  Load an existing BioSQL-style database.

    This function is the easiest way to retrieve a connection to a
    database, doing something like::

        from BioSQL import BioSeqDatabase
        server = BioSeqDatabase.open_database(user="root", db="minidb")

    Arguments:
     - driver - The name of the database driver to use for connecting. The
       driver should implement the python DB API. By default, the MySQLdb
       driver is used.
     - user -the username to connect to the database with.
     - password, passwd - the password to connect with
     - host - the hostname of the database
     - database or db - the name of the database

    psycopgzUUsing BioSQL with psycopg (version one) is no longer supported. Use psycopg2 instead.javar   )zxJDBCMySQLdbzcom.mysql.jdbc.Driverzjdbc:mysql://host/)psycopg2zorg.postgresql.Driverzjdbc:postgresql://connect)fromlist)r   mysql.connectordatabasedbpasswordpasswdr   pgdb	template1mysqluser
postgresqlz?stringtype=unspecified)sqlite3zSET sql_mode='ANSI_QUOTES';zaSELECT ev_class FROM pg_rewrite WHERE rulename='rule_bioentry_i1' OR rulename='rule_bioentry_i2';N)BiopythonWarninga  Your BioSQL PostgreSQL schema includes some rules currently required for bioperl-db but which maycause problems loading data using Biopython (see BioSQL's RedMine Bug 2839 aka GitHub Issue 4 https://github.com/biosql/biosql/issues/4). If you do not use BioPerl, please remove these rules. Biopython should cope with the rules present, but with a performance penalty when loading new records.Tr   zPRAGMA foreign_keys = ON )
ValueErrorosnamecom.ziclix.python.sqlr
   
__import__r   copygetDBServeradaptorexecuteexecute_and_fetchallwarningsBior   warn_POSTGRES_RULES_PRESENT)driverkwargsr
   modulejdbc_driverurl_prefr   kwconnserversqlr+   r   s                d/mounts/lovelace/software/anaconda3/envs/py312/lib/python3.12/site-packages/BioSQL/BioSeqDatabase.pyopen_databaser9      s   & 6
 	

 
ww&0[ 1K&7#=H|#1K+fVn<sBH Fi[9nnG 
B//BGGv4E*~BtH:j>BxL:
 2:XBzN4r>\BzN8%%bffZ.@$:	ww&[ 266*g666
:	D |#266*l;;>WW6
:	D 
;	r*~&}}	ww&$/$' //<= %%+ 	
 >>..s3,MM' ! '+# M 
9	 	9:M    c                   n    e Zd ZdZddZd Zd Zd Zd Zd Z	d	 Z
d
 Zd Zd ZddZd Zd Zd Zd Zy)r'   zRepresents a BioSQL database containing namespaces (sub-databases).

    This acts like a Python dictionary, giving access to each namespace
    (defined by a row in the biodatabase table) as a BioSeqDatabase object.
    Nc                     || _         ||j                  }|dk(  rd}nd}t        j                  |t              } ||t        j                  |      |      | _        || _        y)a  Create a DBServer object.

        Arguments:
         - conn - A database connection object
         - module - The module used to create the database connection
         - module_name - Optionally, the name of the module. Default: module.__name__

        Normally you would not want to create a DBServer object yourself.
        Instead use the open_database function, which returns an instance of DBServer.
        Nr   TF)wrap_cursor)	r1   __name___interface_specific_adaptorsr&   Adaptorr   get_dbutilsr(   module_name)selfr5   r1   rB   r=   Adapts         r8   __init__zDBServer.__init__   sg      //K++KK,00gF'%%k2
 'r:   c                 b    | j                   j                   d| j                  j                  dS )zEReturn a short description of the class name and database connection.())	__class__r>   r(   r5   rC   s    r8   __repr__zDBServer.__repr__   s+    ..))*!DLL,=,=+@BBr:   c                 .    t        | j                  |      S )zqReturn a BioSeqDatabase object.

        Arguments:
            - name - The name of the BioSeqDatabase

        )BioSeqDatabaser(   )rC   r"   s     r8   __getitem__zDBServer.__getitem__   s     dllD11r:   c                 T    d}t        | j                  j                  |      d         S )z=Return number of namespaces (sub-databases) in this database.z$SELECT COUNT(name) FROM biodatabase;r   )intr(   execute_and_fetch_col0rC   r7   s     r8   __len__zDBServer.__len__   s&    44<<66s;A>??r:   c                 X    d}t        | j                  j                  ||f      d         S )z5Check if a namespace (sub-database) in this database.z2SELECT COUNT(name) FROM biodatabase WHERE name=%s;r   )boolr(   rQ   )rC   valuer7   s      r8   __contains__zDBServer.__contains__   s*    BDLL77eXFqIJJr:   c                 H    t        | j                  j                               S z8Iterate over namespaces (sub-databases) in the database.)iterr(   list_biodatabase_namesrJ   s    r8   __iter__zDBServer.__iter__   s     DLL779::r:   c                     t        |       S rY   rZ   rJ   s    r8   keyszDBServer.keys       Dzr:   c              #   (   K   | D ]	  }| |     yw)z4Iterate over BioSeqDatabase objects in the database.Nr   rC   keys     r8   valueszDBServer.values         	Cs)O	   c              #   ,   K   | D ]  }|| |   f  yw)z9Iterate over (namespace, BioSeqDatabase) in the database.Nr   rb   s     r8   itemszDBServer.items   #      	!CtCy. 	!   c                     || vrt        |      | j                  j                  |      }t        j                  | j                  |      }|j                          y)z'Remove a namespace and all its entries.N)KeyErrorr(   fetch_dbid_by_dbnamer   DatabaseRemoverremove)rC   r"   db_idremovers       r8   __delitem__zDBServer.__delitem__   sH    t4. 11$7((u=r:   c                 p    d}| j                   j                  ||||f       t        | j                   |      S )z/Add a new database to the server and return it.zJINSERT INTO biodatabase (name, authority, description) VALUES (%s, %s, %s))r(   r)   rM   )rC   db_name	authoritydescriptionr7   s        r8   new_databasezDBServer.new_database   s:    # 	 	S7I{"CDdllG44r:   c                 $   d}t        |      5 }|D ]M  }|j                  d      r|j                  d      r'|j                         s8||j                         dz   z  }O 	 ddd       | j                  dv r&| j                  j
                  j                  |       y| j                  dv rA|j                  d      }|dd	 D ]'  }| j                  j
                  j                  |       ) yt        d
| j                   d      # 1 sw Y   xY w)zLoad a database schema into the given database.

        This is used to create tables, etc when a database is first created.
        sql_file should specify the complete path to a file containing
        SQL entries for building the tables.
         z--# Nr   )r   r   r   ;zModule z not supported by the loader.)	open
startswithstriprB   r(   cursorr)   splitr    )rC   sql_filer7   
sql_handleline	sql_partssql_lines          r8   load_database_sqlzDBServer.load_database_sql   s    (^ 	.z" .??4(__S)ZZ\4::<#--C.	. 33LL'', !JJ		#I%crN 6##++H56 wt'7'7&88UVWW/	. 	.s   :D	DDc                 6    | j                   j                         S )z/Commit the current transaction to the database.)r(   commitrJ   s    r8   r   zDBServer.commit  s    ||""$$r:   c                 6    | j                   j                         S z"Roll-back the current transaction.)r(   rollbackrJ   s    r8   r   zDBServer.rollback!  s    ||$$&&r:   c                 6    | j                   j                         S z3Close the connection. No further activity possible.)r(   closerJ   s    r8   r   zDBServer.close%  s    ||!!##r:   N)NN)r>   
__module____qualname____doc__rE   rK   rN   rS   rW   r\   r_   rd   rh   rr   rw   r   r   r   r   r   r:   r8   r'   r'      sW    '2C2@
K
;

!
5$XL%'$r:   r'   c                   <    e Zd ZdZd Zd
dZd Zd Zd Zd Z	d	 Z
y)_CursorWrapperzCA wrapper for mysql.connector resolving bytestring representations.c                     || _         y r   )real_cursor)rC   r   s     r8   rE   z_CursorWrapper.__init__-  s
    &r:   Nc                 >    | j                   j                  |||       y)zExecute a sql statement.N)r   r)   )rC   	operationparamsmultis       r8   r)   z_CursorWrapper.execute0  s      FE:r:   c                 <    | j                   j                  ||       y)zExecute many sql statements.N)r   executemany)rC   r   r   s      r8   r   z_CursorWrapper.executemany4  s    $$Y7r:   c                     t        |      }t        |      D ]*  \  }}t        |t              s|j	                  d      ||<   , t        |      S )z4Decode any bytestrings present in the row (PRIVATE).zutf-8)list	enumerate
isinstancebytesdecodetuple)rC   tuple_
tuple_listielems        r8   _convert_tuplez_CursorWrapper._convert_tuple8  sL    &\
 , 	5GAt$& $G 4
1	5 Z  r:   c                 \    g }|D ]$  }| j                  |      }|j                  |       & |S r   )r   append)rC   lstret_lstr   	new_tuples        r8   _convert_listz_CursorWrapper._convert_list@  s9     	&F++F3INN9%	& r:   c                 X    | j                   j                         }| j                  |      S r   )r   fetchallr   )rC   rvs     r8   r   z_CursorWrapper.fetchallG  s'    &&(!!"%%r:   c                 X    | j                   j                         }| j                  |      S r   )r   fetchoner   )rC   r   s     r8   r   z_CursorWrapper.fetchoneK  s'    !!**,""6**r:   )NF)r>   r   r   r   rE   r)   r   r   r   r   r   r   r:   r8   r   r   *  s(    M';8!&+r:   r   c                       e Zd ZdZddZd ZddZd Zd Zd Z	d Z
d	 Zd
 Zd Zd Zd Zd Zd Zd Zd ZddZddZd Zd ZddZddZy)r@   zHigh level wrapper for a database connection and cursor.

    Most database calls in BioSQL are done indirectly though this adaptor
    class. This provides helper methods for fetching data and executing
    sql.
    c                     || _         |r&t        |j                               | _        || _        y|j                         | _        || _        y)zCreate an Adaptor object.

        Arguments:
         - conn - A database connection
         - dbutils - A BioSQL.DBUtils object
         - wrap_cursor - Optional, whether to wrap the cursor object

        N)r5   r   r   dbutils)rC   r5   r   r=   s       r8   rE   zAdaptor.__init__X  s?     	(7DK  ++-DKr:   c                 N    | j                   j                  | j                  |      S )z.Return the last row id for the selected table.)r   last_idr   )rC   tables     r8   r   zAdaptor.last_idh  s    ||##DKK77r:   c                 N    | j                   j                  | j                  |      S )zASet the autocommit mode. True values enable; False value disable.)r   
autocommitr5   )rC   ys     r8   r   zAdaptor.autocommitl  s    ||&&tyy!44r:   c                 6    | j                   j                         S )zCommit the current transaction.)r5   r   rJ   s    r8   r   zAdaptor.commitp  s    yy!!r:   c                 6    | j                   j                         S r   )r5   r   rJ   s    r8   r   zAdaptor.rollbackt  s    yy!!##r:   c                 6    | j                   j                         S r   )r5   r   rJ   s    r8   r   zAdaptor.closex  s    yy  r:   c                     | j                  d|f       | j                  j                         }|st        d|      |d   d   S )z;Return the internal id for the sub-database using its name.z6select biodatabase_id from biodatabase where name = %sz"Cannot find biodatabase with name r   )r)   r   r   rl   )rC   dbnamer   s      r8   rm   zAdaptor.fetch_dbid_by_dbname|  sL    Dvi	
 [[!!#?zJKK!uQxr:   c                     d}|g}|r|dz  }|j                  |       | j                  ||       | j                  j                         }|st	        d|      t        |      dkD  rt	        d|      |d   d   S )a  Return the internal id for a sequence using its display id.

        Arguments:
         - dbid - the internal id for the sub-database
         - name - the name of the sequence. Corresponds to the
           name column of the bioentry table of the SQL schema

        z0select bioentry_id from bioentry where name = %s and biodatabase_id = %sCannot find display id r   z$More than one entry with display id r   r   r)   r   r   
IndexErrorlenrC   dbidr"   r7   fieldsr   s         r8   fetch_seqid_by_display_idz!Adaptor.fetch_seqid_by_display_id  s     A--CMM$S&![[!!#6th?@@r7Q;CD8LMM!uQxr:   c                     d}|g}|r|dz  }|j                  |       | j                  ||       | j                  j                         }|st	        d|      t        |      dkD  rt	        d|      |d   d   S )a  Return the internal id for a sequence using its accession.

        Arguments:
         - dbid - the internal id for the sub-database
         - name - the accession of the sequence. Corresponds to the
           accession column of the bioentry table of the SQL schema

        5select bioentry_id from bioentry where accession = %sr   zCannot find accession r   z#More than one entry with accession r   r   r   s         r8   fetch_seqid_by_accessionz Adaptor.fetch_seqid_by_accession  s     F--CMM$S&![[!!#5dX>??r7Q;B4(KLL!uQxr:   c                 `    d}|g}|r|dz  }|j                  |       | j                  ||      S )a  Return a list internal ids using an accession.

        Arguments:
         - dbid - the internal id for the sub-database
         - name - the accession of the sequence. Corresponds to the
           accession column of the bioentry table of the SQL schema

        r   r   )r   rQ   )rC   r   r"   r7   r   s        r8   fetch_seqids_by_accessionz!Adaptor.fetch_seqids_by_accession  s>     F--CMM$**377r:   c                    |j                  d      }t        |      dkD  rt        d|      |d   }t        |      dk(  r|d   }nd}d}||g}|r|dz  }|j                  |       | j	                  ||       | j
                  j                         }|st        d	|      t        |      dkD  rt        d
|      |d   d   S )a  Return the internal id for a sequence using its accession and version.

        Arguments:
         - dbid - the internal id for the sub-database
         - name - the accession of the sequence containing a version number.
           Must correspond to <accession>.<version>

        .   zBad version r   r   0zFSELECT bioentry_id FROM bioentry WHERE accession = %s AND version = %sr   zCannot find version z!More than one entry with version )r   r   r   r   r)   r   r   )	rC   r   r"   acc_versionaccversionr7   r   r   s	            r8   fetch_seqid_by_versionzAdaptor.fetch_seqid_by_version  s     jjo{a|D8455!n{q !!nGGVw--CMM$S&![[!!#3D8<==r7Q;@IJJ!uQxr:   c                     d}|g}|r|dz  }|j                  |       | j                  ||       | j                  j                         }|st	        d|      |d   d   S )a!  Return the internal id for a sequence using its identifier.

        Arguments:
         - dbid - the internal id for the sub-database
         - identifier - the identifier of the sequence. Corresponds to
           the identifier column of the bioentry table in the SQL schema.

        z6SELECT bioentry_id FROM bioentry WHERE identifier = %sr   r   r   )r   r)   r   r   r   )rC   r   
identifierr7   r   r   s         r8   fetch_seqid_by_identifierz!Adaptor.fetch_seqid_by_identifier  sn     G--CMM$S&![[!!#6znEFF!uQxr:   c                 $    | j                  d      S )z*Return a list of all of the sub-databases.zSELECT name FROM biodatabaserQ   rJ   s    r8   r[   zAdaptor.list_biodatabase_names  s    **+IJJr:   c                 (    | j                  d|f      S )zReturn a list of internal ids for all of the sequences in a sub-databae.

        Arguments:
         - dbid - The internal id for a sub-database

        z:SELECT bioentry_id FROM bioentry WHERE biodatabase_id = %sr   rC   r   s     r8   list_bioentry_idszAdaptor.list_bioentry_ids  s     **H4'
 	
r:   c                 (    | j                  d|f      S )zReturn a list of all sequence names in a sub-databae.

        Arguments:
         - dbid - The internal id for a sub-database

        z3SELECT name FROM bioentry WHERE biodatabase_id = %sr   r   s     r8   list_bioentry_display_idsz!Adaptor.list_bioentry_display_ids  s     **AD7
 	
r:   c                 &    | j                  ||      S )zReturn ids given a SQL statement to select for them.

        This assumes that the given SQL does a SELECT statement that
        returns a list of items. This parses them out of the 2D list
        they come as and just returns them in a list.
        r   rC   r7   argss      r8   list_any_idszAdaptor.list_any_ids  s     **355r:   Nc                     | j                  ||xs d       | j                  j                         }t        |      dk7  rt	        dt        |       d      |d   S )9Execute sql that returns 1 record, and return the record.r   r   zExpected 1 response, got r   r   )r)   r   r   r   r    )rC   r7   r   r   s       r8   execute_onezAdaptor.execute_one  sS    S$*"%[[!!#r7a<8R	CDD!ur:   c                     t         j                  dk(  r|j                  dd      }| j                  j	                  | j
                  ||       y)zJust execute an sql command.r	   %s?N)r!   r"   replacer   r)   r   r   s      r8   r)   zAdaptor.execute   s8    77f++dC(CT[[#t4r:   c                     t         j                  dk(  r|j                  dd      }| j                  j	                  | j
                  ||       y)zExecute many sql commands.r	   r   r   N)r!   r"   r   r   r   r   r   s      r8   r   zAdaptor.executemany&  s8    77f++dC(C  c48r:   c                 B    ||z
  }| j                  d|dz   ||f      d   S )zReturn a substring of a sequence.

        Arguments:
         - seqid - The internal id for the sequence
         - start - The start position of the sequence; 0-indexed
         - end - The end position of the sequence

        zBSELECT SUBSTR(seq, %s, %s) FROM biosequence WHERE bioentry_id = %sr   r   )r   )rC   seqidstartendlengths        r8   get_subseq_as_stringzAdaptor.get_subseq_as_string,  s=     u PQY&
  	r:   c                     | j                  ||xs d       | j                  j                         D cg c]  }|d   	 c}S c c}w )9Return a list of values from the first column in the row.r   r   r)   r   r   )rC   r7   r   fields       r8   rQ   zAdaptor.execute_and_fetch_col0A  s9    S$*"%&*kk&:&:&<=Ua===s   Ac                 b    | j                  ||xs d       | j                  j                         S )$Return a list of tuples of all rows.r   r   r   s      r8   r*   zAdaptor.execute_and_fetchallF  s'    S$*"%{{##%%r:   F)Tr   )r>   r   r   r   rE   r   r   r   r   r   rm   r   r   r   r   r   r[   r   r   r   r   r)   r   r   rQ   r*   r   r:   r8   r@   r@   P  sv     85"$!,,8 <*K	
	
659*>
&r:   r@   c                   L     e Zd ZdZed        Zd fd	Zd fd	Zd fd	Z xZ	S )MysqlConnectorAdaptora  A BioSQL Adaptor class with fixes for the MySQL interface.

    BioSQL was failing due to returns of bytearray objects from
    the mysql-connector-python database connector. This adaptor
    class scrubs returns of bytearrays and of byte strings converting
    them to string objects instead. This adaptor class was made in
    response to backwards incompatible changes added to
    mysql-connector-python in release 2.0.0 of the package.
    c                 R    t        | t        t        f      r| j                         S | S )z:If s is bytes or bytearray, convert to a string (PRIVATE).)r   r   	bytearrayr   )ss    r8   _bytearray_to_strz'MysqlConnectorAdaptor._bytearray_to_strW  s$     a%+,88:r:   c                 N     t            ||      }t         fd|D              S )r   c              3   @   K   | ]  }j                  |        y wr   r  .0vrC   s     r8   	<genexpr>z4MysqlConnectorAdaptor.execute_one.<locals>.<genexpr>a  s     <1T++A.<   )superr   r   )rC   r7   r   outrI   s   `   r8   r   z!MysqlConnectorAdaptor.execute_one^  s%    g!#t,<<<<r:   c                 l    t         |   ||      }|D cg c]  }| j                  |       c}S c c}w )r   )r  rQ   r  )rC   r7   r   r  columnrI   s        r8   rQ   z,MysqlConnectorAdaptor.execute_and_fetch_col0c  s3    g,S$7=@A6&&v.AAAs   1c                 t     t            ||      }|D cg c]  }t         fd|D               c}S c c}w )r   c              3   @   K   | ]  }j                  |        y wr   r  r  s     r8   r
  z=MysqlConnectorAdaptor.execute_and_fetchall.<locals>.<genexpr>k  s     ;Ad,,Q/;r  )r  r*   r   )rC   r7   r   r  orI   s   `    r8   r*   z*MysqlConnectorAdaptor.execute_and_fetchallh  s3    g*35EHI;;;IIIs   5r   )
r>   r   r   r   staticmethodr  r   rQ   r*   __classcell__)rI   s   @r8   r   r   L  s3      =
B
J Jr:   r   )r   r   r   r   r   r   )
primary_idgi
display_idr"   	accessionr   c                   r    e Zd ZdZd Zd Zd Zd Zd Zd Z	d Z
d	 Zd
 Zd Zd Zd Zd Zd Zd ZddZy)rM   zRepresents a namespace (sub-database) within the BioSQL database.

    i.e. One row in the biodatabase table, and all all rows in the bioentry
    table associated with it.
    c                 `    || _         || _        | j                   j                  |      | _        y)zCreate a BioDatabase object.

        Arguments:
         - adaptor - A BioSQL.Adaptor object
         - name - The name of the sub-database (namespace)

        N)r(   r"   rm   r   )rC   r(   r"   s      r8   rE   zBioSeqDatabase.__init__  s(     	LL55d;	r:   c                 <    d| j                   d| j                  dS )z-Return a short summary of the BioSeqDatabase.zBioSeqDatabase(z, rH   )r(   r"   rJ   s    r8   rK   zBioSeqDatabase.__repr__  s       0499-qAAr:   c                     | j                   j                  | j                  |      }t        j                  | j                   |      S )zGet a DBSeqRecord object by its name.

        Example: seq_rec = db.get_Seq_by_id('ROA1_HUMAN')

        The name of this method is misleading since it returns a DBSeqRecord
        rather than a Seq object, and presumably was to mirror BioPerl.
        )r(   r   r   r   DBSeqRecordrC   r"   r   s      r8   get_Seq_by_idzBioSeqDatabase.get_Seq_by_id  s5     66tyy$G!!$,,66r:   c                     | j                   j                  | j                  |      }t        j                  | j                   |      S )a  Get a DBSeqRecord object by accession number.

        Example: seq_rec = db.get_Seq_by_acc('X77802')

        The name of this method is misleading since it returns a DBSeqRecord
        rather than a Seq object, and presumably was to mirror BioPerl.
        )r(   r   r   r   r  r  s      r8   get_Seq_by_acczBioSeqDatabase.get_Seq_by_acc  s5     55diiF!!$,,66r:   c                     | j                   j                  | j                  |      }t        j                  | j                   |      S )a  Get a DBSeqRecord object by version number.

        Example: seq_rec = db.get_Seq_by_ver('X77802.1')

        The name of this method is misleading since it returns a DBSeqRecord
        rather than a Seq object, and presumably was to mirror BioPerl.
        )r(   r   r   r   r  r  s      r8   get_Seq_by_verzBioSeqDatabase.get_Seq_by_ver  s5     33DIItD!!$,,66r:   c                     | j                   j                  | j                  |      }|D cg c]"  }t        j                  | j                   |      $ c}S c c}w )a/  Get a list of DBSeqRecord objects by accession number.

        Example: seq_recs = db.get_Seq_by_acc('X77802')

        The name of this method is misleading since it returns a list of
        DBSeqRecord objects rather than a list of Seq objects, and presumably
        was to mirror BioPerl.
        )r(   r   r   r   r  )rC   r"   seqidsr   s       r8   get_Seqs_by_acczBioSeqDatabase.get_Seqs_by_acc  sC     77		4HEKLE""4<<7LLLs   'Ac                     t        j                  | j                  |      }|j                  | j                  k7  rt        d|d      |S )zReturn a DBSeqRecord for one of the sequences in the sub-database.

        Arguments:
         - key - The internal id for the sequence

        Entry z* does exist, but not in current name space)r   r  r(   _biodatabase_idr   rl   )rC   rc   records      r8   rN   zBioSeqDatabase.__getitem__  sF     ##DLL#6!!TYY.VC7*TUVVr:   c                 ~    || vrt        d|d      d}| j                  j                  || j                  |f       y)z'Remove an entry and all its annotation.r(  z2 cannot be deleted. It was not found or is invalidz@DELETE FROM bioentry WHERE biodatabase_id=%s AND bioentry_id=%s;N)rl   r(   r)   r   )rC   rc   r7   s      r8   rr   zBioSeqDatabase.__delitem__  sH    d?QR  QS499c"23r:   c                 l    d}t        | j                  j                  || j                  f      d         S )z:Return number of records in this namespace (sub database).z@SELECT COUNT(bioentry_id) FROM bioentry WHERE biodatabase_id=%s;r   )rP   r(   rQ   r   rR   s     r8   rS   zBioSeqDatabase.__len__  s.    P4<<66sTYYLI!LMMr:   c                     d}	 t        |      }t        | j                  j	                  || j
                  |f      d         S # t        $ r Y yw xY w)zBCheck if a primary (internal) id is this namespace (sub database).zSSELECT COUNT(bioentry_id) FROM bioentry WHERE biodatabase_id=%s AND bioentry_id=%s;Fr   )rP   r    rU   r(   rQ   r   )rC   rV   r7   bioentry_ids       r8   rW   zBioSeqDatabase.__contains__  s_    : 		e*K LL//dii5MNqQ
 	
  		s   A 	AAc                 ^    t        | j                  j                  | j                              S zEIterate over ids (which may not be meaningful outside this database).)rZ   r(   r   r   rJ   s    r8   r\   zBioSeqDatabase.__iter__  s"     DLL22499=>>r:   c                     t        |       S r0  r^   rJ   s    r8   r_   zBioSeqDatabase.keys  r`   r:   c              #   (   K   | D ]	  }| |     yw)zAIterate over DBSeqRecord objects in the namespace (sub database).Nr   rb   s     r8   rd   zBioSeqDatabase.values  re   rf   c              #   ,   K   | D ]  }|| |   f  yw)z@Iterate over (id, DBSeqRecord) for the namespace (sub database).Nr   rb   s     r8   rh   zBioSeqDatabase.items  ri   rj   c                 ~   t        |      dk7  rt        d      t        |j                               d   \  }}|t        vr,t        dt        t        j                               d|      t        |   }t        | j                  |      } || j                  |      }t        j                  | j                  |      S )zReturn a DBSeqRecord using an acceptable identifier.

        Arguments:
         - kwargs - A single key-value pair where the key is one
           of primary_id, gi, display_id, name, accession, version

        r   z#single key/value parameter expectedr   zlookup() expects one of z, not )r   	TypeErrorr   rh   _allowed_lookupsr_   getattrr(   r   r   r  )rC   r0   kr	  lookup_namelookup_funcr   s          r8   lookupzBioSeqDatabase.lookup  s     v;!ABBFLLN#A&1$$*40@0E0E0G+H*K6RSQVW  'q)dllK8DIIq)!!$,,66r:   c           	         t        j                  | j                  | j                  |      }d}|D ]  }|dz  }t        r|j
                  j                  d      dk(  r+|j
                  j                  d      \  }}	 t        |      }n|j
                  }d}|j                  j                  d      }d}	| j                  j                  |	|| j                  ||| j                  fz         | j                  j                  j                         r%| j                  j                  j!                  d      |j#                  |        |S # t        $ r |j
                  }d}Y w xY w)a  Load a set of SeqRecords into the BioSQL database.

        record_iterator is either a list of SeqRecord objects, or an
        Iterator object that returns SeqRecord objects (such as the
        output from the Bio.SeqIO.parse() function), which will be
        used to populate the database.

        fetch_NCBI_taxonomy is boolean flag allowing or preventing
        connection to the taxonomic database on the NCBI server
        (via Bio.Entrez) to fetch a detailed taxonomy for each
        SeqRecord.

        Example::

            from Bio import SeqIO
            count = db.load(SeqIO.parse(open(filename), format))

        Returns the number of records loaded.
        r   r   r   r  zSELECT bioentry_id FROM bioentry WHERE (identifier = '%s' AND biodatabase_id = '%s') OR (accession = '%s' AND version = '%s' AND biodatabase_id = '%s')z7Duplicate record detected: record has not been inserted)r   DatabaseLoaderr(   r   r.   idcountr   rP   r    annotationsr&   r)   r   r   r5   IntegrityErrorload_seqrecord)
rC   record_iteratorfetch_NCBI_taxonomy	db_loadernum_records
cur_recordr  r   r  r7   s
             r8   loadzBioSeqDatabase.load  sQ   ( ))$,,		CVW	) 	1J1K '==&&s+q0)3)<)<S)A&Iw$"%g,
 !+IG++//5Y 
 $$2tyy)WdiiHH <<&&//1,,++::Q  $$Z0=	1> + & $$.MM	"#$s   <E		E#"E#Nr   )r>   r   r   r   rE   rK   r  r!  r#  r&  rN   rr   rS   rW   r\   r_   rd   rh   r;  rH  r   r:   r8   rM   rM     sZ    
<B	7	7	7
M
4N

 ?

!
7(6r:   rM   r   )r   r!   ry   r   r   r   r.   r9   r'   r   r@   r   r?   r6  rM   r   r:   r8   <module>rI     s    
    |~L$ L$^#+ #+Ly& y&xJG JH -$   .
%-'+' L Lr:   