
    &h~`                         d Z ddlZddlZddlZddlZddlmZ ddlmZ 	 ddl	Z	ej                  dd       Zd Z G d de      Z G d	 d
ej                  j                        Z G d de      Zy# e
$ r dZ	Y Uw xY w)zCode for more fancy file handles.

Bio.File defines private classes used in Bio.SeqIO and Bio.SearchIO for
indexing files. These are not intended for direct use.
    N)ABC)abstractmethodc              +      K   	 t        | |fi |5 }| ddd       y# 1 sw Y   yxY w# t        $ r |  Y yw xY ww)a?  Context manager to ensure we are using a handle.

    Context manager for arguments that can be passed to SeqIO and AlignIO read, write,
    and parse methods: either file objects or path-like objects (strings, pathlib.Path
    instances, or more generally, anything that can be handled by the builtin 'open'
    function).

    When given a path-like object, returns an open file handle to that path, with provided
    mode, which will be closed when the manager exits.

    All other inputs are returned, and are *not* closed.

    Arguments:
     - handleish  - Either a file handle or path-like object (anything which can be
                    passed to the builtin 'open' function, such as str, bytes,
                    pathlib.Path, and os.DirEntry objects)
     - mode       - Mode to open handleish (used only if handleish is a string)
     - kwargs     - Further arguments to pass to open(...)

    Examples
    --------
    >>> from Bio import File
    >>> import os
    >>> with File.as_handle('seqs.fasta', 'w') as fp:
    ...     fp.write('>test\nACGT')
    ...
    10
    >>> fp.closed
    True

    >>> handle = open('seqs.fasta', 'w')
    >>> with File.as_handle(handle) as fp:
    ...     fp.write('>test\nACGT')
    ...
    10
    >>> fp.closed
    False
    >>> fp.close()
    >>> os.remove("seqs.fasta")  # tidy up

    N)open	TypeError)	handleishmodekwargsfps       W/mounts/lovelace/software/anaconda3/envs/py312/lib/python3.12/site-packages/Bio/File.py	as_handler      sI     V)T,V, 	H	 	 	 s4   >+ + >(+ >+ ;>;>c                    t        | d      }|j                  d      }|j                  d       |dk(  rddlm} 	 |j                  d|      S |S # t        $ r0}dt        |      v sJ |j                          t        d	      d
d
}~ww xY w)zOpen a file in binary mode, spot if it is BGZF format etc (PRIVATE).

    This functionality is used by the Bio.SeqIO and Bio.SearchIO index
    and index_db functions.

    If the file is gzipped but not BGZF, a specific ValueError is raised.
    rb   r   s      )bgzf)r	   fileobjBGZFz[Gzipped files are not suitable for indexing, please use BGZF (blocked gzip format) instead.N)	r   readseek r   
BgzfReader
ValueErrorstrclose)filenamehandlemagicr   es        r   _open_for_random_accessr    N   s     (D!FKKNE
KKN
	??f?== M  	SV##LLNA 		s   A 	B	+BB	c                   6    e Zd ZdZed        Zed        Zd Zy)_IndexedSeqFileProxya<  Abstract base class for file format specific random access (PRIVATE).

    This is subclasses in both Bio.SeqIO for indexing as SeqRecord
    objects, and in Bio.SearchIO for indexing QueryResult objects.

    Subclasses for each file format should define '__iter__', 'get'
    and optionally 'get_raw' methods.
    c                     t         )zReturn (identifier, offset, length in bytes) tuples.

        The length can be zero where it is not implemented or not
        possible for a particular file format.
        NotImplementedErrorselfs    r   __iter__z_IndexedSeqFileProxy.__iter__{   s
     "!    c                     t         )z$Return parsed object for this entry.r$   r'   offsets     r   getz_IndexedSeqFileProxy.get   s
    
 "!r)   c                     t        d      )zReturn the raw record from the file as a bytes string (if implemented).

        If the key is not found, a KeyError exception is raised.

        This may not have been implemented for all file formats.
        z#Not available for this file format.r$   r+   s     r   get_rawz_IndexedSeqFileProxy.get_raw   s     ""GHHr)   N)__name__
__module____qualname____doc__r   r(   r-   r/    r)   r   r"   r"   q   s5     " " " "Ir)   r"   c                   @    e Zd ZdZd Zd Zd Zd Zd Zd Z	d Z
d	 Zy
)_IndexedSeqFileDicta5  Read only dictionary interface to a sequential record file.

    This code is used in both Bio.SeqIO for indexing as SeqRecord
    objects, and in Bio.SearchIO for indexing QueryResult objects.

    Keeps the keys and associated file offsets in memory, reads the file
    to access entries as objects parsing them on demand. This approach
    is memory limited, but will work even with millions of records.

    Note duplicate keys are not allowed. If this happens, a ValueError
    exception is raised.

    As used in Bio.SeqIO, by default the SeqRecord's id string is used
    as the dictionary key. In Bio.SearchIO, the query's id string is
    used. This can be changed by supplying an optional key_function,
    a callback function which will be given the record id and must
    return the desired key. For example, this allows you to parse
    NCBI style FASTA identifiers, and extract the GI number to use
    as the dictionary key.

    Note that this dictionary is essentially read only. You cannot
    add or change values, pop values, nor clear the dictionary.
    c                    || _         | _        || _        || _        d| _        rfd|D        }n|}i }|D ]B  \  }}}	||v r3| j                   j
                  j                          t        d| d      |||<   D || _        y)Initialize the class.)NNc              3   <   K   | ]  \  }}} |      ||f  y wNr4   ).0keyr,   lengthkey_functions       r   	<genexpr>z/_IndexedSeqFileDict.__init__.<locals>.<genexpr>   s+      )S&& c"FF3s   zDuplicate key ''N)	_proxy_key_function_repr	_obj_repr_cached_prev_record_handler   r   _offsets)
r'   random_access_proxyr>   reprobj_reproffset_iteroffsetsr<   r,   r=   s
     `       r   __init__z_IndexedSeqFileDict.__init__   s     *)
!#/ -@K
 .K#. 	&C g~##))+ ?3%q!9::%	&  r)   c                     | j                   S )z2Return a string representation of the File object.rC   r&   s    r   __repr__z_IndexedSeqFileDict.__repr__   s    zzr)   c                 b    | r-dt        | j                               d   d| j                   dS y)z2Create a string representation of the File object.{r   z : z(...), ...}z{})listkeysrD   r&   s    r   __str__z_IndexedSeqFileDict.__str__   s5     TYY[)!,/s4>>2B,OOr)   c                 ,    t        | j                        S )zReturn the number of records.)lenrG   r&   s    r   __len__z_IndexedSeqFileDict.__len__   s    4==!!r)   c                 ,    t        | j                        S )Iterate over the keys.)iterrG   r&   s    r   r(   z_IndexedSeqFileDict.__iter__   s    DMM""r)   c                 @   || j                   d   k(  r| j                   d   S | j                  j                  | j                  |         }| j                  r| j	                  |j
                        }n|j
                  }||k7  rt        d| d| d      ||f| _         |S )a  Return record for the specified key.

        As an optimization when repeatedly asked to look up the same record,
        the key and record are cached so that if the *same* record is
        requested next time, it can be returned without going to disk.
        r   r   Key did not match ( vs ))rE   rA   r-   rG   rB   idr   )r'   r<   recordkey2s       r   __getitem__z_IndexedSeqFileDict.__getitem__   s     $**1--++A..s!34%%fii0D99D$;23%tD6CDD$'= r)   c                 R    | j                   j                  | j                  |         S )Return the raw record from the file as a bytes string.

        If the key is not found, a KeyError exception is raised.
        )rA   r/   rG   r'   r<   s     r   r/   z_IndexedSeqFileDict.get_raw   s"     {{""4==#566r)   c                 L    | j                   j                  j                          y)a?  Close the file handle being used to read the data.

        Once called, further use of the index won't work. The sole purpose
        of this method is to allow explicit handle closure - for example
        if you wish to delete the file, on Windows you must first close
        all open handles to that file.
        N)rA   rF   r   r&   s    r   r   z_IndexedSeqFileDict.close   s     	!!#r)   N)r0   r1   r2   r3   rM   rP   rU   rX   r(   rc   r/   r   r4   r)   r   r6   r6      s0    0 B"#(7$r)   r6   c                   P    e Zd ZdZ	 ddZd Zd Zd Zd Zd Z	d Z
d	 Zd
 Zd Zy)_SQLiteManySeqFilesDictaK  Read only dictionary interface to many sequential record files.

    This code is used in both Bio.SeqIO for indexing as SeqRecord
    objects, and in Bio.SearchIO for indexing QueryResult objects.

    Keeps the keys, file-numbers and offsets in an SQLite database. To access
    a record by key, reads from the offset in the appropriate file and then
    parses the record into an object.

    There are OS limits on the number of files that can be open at once,
    so a pool are kept. If a record is required from a closed file, then
    one of the open handles is closed first.
    c                    t         ddlm}  |d      |t        |      }|| _        || _        || _        || _        || _        || _	        || _
        i | _        t        j                  j                  t        j                  j                  |            | _        t        j                  j#                  |      r| j%                          y| j'                          y)r8   Nr   )MissingPythonDependencyErrorz.Python was compiled without the sqlite3 module)sqlite3Biork   rS   _index_filename
_filenames_formatrB   _proxy_factoryrC   	_max_open_proxiesospathabspathdirname_relative_pathisfile_load_index_build_index)	r'   index_filename	filenamesproxy_factoryfmtr>   rI   max_openrk   s	            r   rM   z _SQLiteManySeqFilesDict.__init__  s      ?8.@   YI  .#)+
! !ggoobggoon.MN77>>.)r)   c           
      	   | j                   }| j                  }| j                  }| j                  }| j                  }t
        j                  j                  |d      }|| _        	 |j                  dd      j                         \  }t        |      | _        | j                  dk(  r|j                          t        d      d|j                  d      j                         \  }| j                  t        |      k7  r4|j                          t        d	t        |      | j                  fz        d|j                  dd
      j                         \  | _        |r;|| j                  k7  r,|j                          t        d| j                   d|       d	 |j                  dd      j                         \  }|j                         dk(  }|j                  d      j#                         D 	cg c]  }	|	d   	 c}	| _        |rt$        j&                  j)                  t$        j&                  j+                  |            }g }
| j                  D ]  }t$        j&                  j-                  |      r|
j/                  |       4|
j/                  t$        j&                  j1                  ||j3                  dt$        j&                  j4                                      |
| _        ~
|r^t7        |      t7        | j                        k7  r=|j                          t        dt7        | j                        t7        |      fz        d|r|| j                  k7  rt9        | j                  |      D ]  \  }}t$        j&                  j)                  |      t$        j&                  j)                  |      k7  sF|j                          |rLt        dt$        j&                  j)                  |      dt$        j&                  j)                  |            dt        dt$        j&                  j)                  |      dt$        j&                  j)                  |            d  || j                        s)|j                          t        d| j                   d      y# t         $ r d}Y w xY wc c}	w # t
        j:                  $ r$}|j                          t        d|       dd}~ww xY w)z8Call from __init__ to reuse an existing index (PRIVATE).F)check_same_threadz(SELECT value FROM meta_data WHERE key=?;)countzUnfinished/partial databaseNz%SELECT MAX(_ROWID_) FROM offset_data;z#Corrupt database? %i entries not %i)formatzIndex file says format z, not )filenames_relative_to_indexTRUEz0SELECT name FROM file_data ORDER BY file_number;r   /z Index file says %i files, not %iz)Index file has different filenames, e.g. z != zIndex file has different filenames [This is an old index where any relative paths were relative to the original working directory]. e.g. z Not a Biopython index database? Unsupported format 'r@   )rn   rx   ro   rp   rq   rl   dbapi2connect_conexecutefetchoneint_lengthr   r   upperr   fetchallrt   ru   rv   rw   isabsappendjoinreplaceseprW   zipOperationalError)r'   r|   relative_pathr}   r   r~   conr   r   rowtmpfoldnewerrs                  r   rz   z#_SQLiteManySeqFilesDict._load_indexG  s   --++OO	ll++nn$$^u$M	U	Q{{:Jhj U u:DL||r!		 !>?TI
 {{#JKTTVHU||s5z)		 9SZ<VV "kk:Khj T\ sdll*		 -dll^6#G
414>42 (* /,
 05576A , ;;F(*	 ADO + "0O P Aww}}Q'

1 

GGLL		#rww{{8ST #&S^s4??/CC		 64??+S^<=  Y$//9 #DOOY ? (HCwws+rwws/CC		6",#%77??3#79M!O# $((
 #-
 $&77??3#79M	!O# $(((, T\\*IIK3DLL>CDD +k  4.3+4\ '' 	QIIK?uEFDP	QsQ   &D.R- 5R 
!R- +R(7F7R- /B+R- R%!R- $R%%R- -S$ SS$c                 n   | j                   }| j                  }| j                  }| j                  }| j                  | j
                  }| j                  }| j                  }|r|st        d|       ||      st        d| d      t        j                  j                  |      }|| _        |j                  d       |j                  d       |j                  d       |j                  dd       |j                  dd	|f       |j                  dd
       |j                  d       |j                  d       d}	t        |      D ]G  \  }
t        j                   j#                  |
      }t        j                   j%                  |
      sht        j                   j%                  |      sIt        j                   j'                  |
|      j)                  t        j                   j*                  d      }nt        j                   j-                  t        j                   j#                  |
            t        j                   j*                  z   j/                  |t        j                   j*                  z         r`t        j                   j'                  |
|      j)                  t        j                   j*                  d      }|j/                  d      rJ |       |j                  d|f        |||
      }rfd|D        }nfd|D        }	 t1        t3        j4                  |d            }|sn1|j7                  d|       |j9                          |	t;        |      z  }	St;        |      |k  r||<   .|j<                  j?                          J |	| _         	 |j                  d       |j                  d       |j                  d|	df       |j9                          y# t        jB                  $ r;}|| _        | j?                          |j?                          t        d|       dd}~ww xY w)z3Call from __init__ to create a new index (PRIVATE).z0Filenames to index and format required to build r   r@   zPRAGMA synchronous=OFFzPRAGMA locking_mode=EXCLUSIVEz.CREATE TABLE meta_data (key TEXT, value TEXT);z0INSERT INTO meta_data (key, value) VALUES (?,?);)r   r   r   )r   Truez8CREATE TABLE file_data (file_number INTEGER, name TEXT);zYCREATE TABLE offset_data (key TEXT, file_number INTEGER, offset INTEGER, length INTEGER);r   r   z../z7INSERT INTO file_data (file_number, name) VALUES (?,?);c              3   >   K   | ]  \  }}} |      ||f  y wr:   r4   )r;   r<   r,   r=   
file_indexr>   s       r   r?   z7_SQLiteManySeqFilesDict._build_index.<locals>.<genexpr>  s-      -ff "#&
FFCs   c              3   2   K   | ]  \  }}}|||f  y wr:   r4   )r;   r<   r,   r=   r   s       r   r?   z7_SQLiteManySeqFilesDict._build_index.<locals>.<genexpr>  s(      -ff *ff5s   d   zIINSERT INTO offset_data (key,file_number,offset,length) VALUES (?,?,?,?);z@CREATE UNIQUE INDEX IF NOT EXISTS key_index ON offset_data(key);zDuplicate key? NzPRAGMA locking_mode=NORMALz-UPDATE meta_data SET value = ? WHERE key = ?;r   )"rn   rx   ro   rp   rB   rq   rr   rs   r   rl   r   r   r   r   	enumeratert   ru   rv   r   relpathr   r   rw   
startswithrS   	itertoolsisliceexecutemanycommitrW   rF   r   r   IntegrityError)r'   r|   r   r}   r   r~   r   random_access_proxiesr   r   r   r   rH   rK   batchr   r   r>   s                   @@r   r{   z$_SQLiteManySeqFilesDict._build_index  s   --++OO	ll))++>> $)B>BTU  S!3C5:;;nn$$^4	 	,-34 	DEFVFSVX>3	

 	NOD	
 $-i$8 0	4 J)A77==*277==3P GGOOHm<DDRWW[[RUV''//"''//(";<rww{{JVV+
 GGOOHm<DDRWW[[RUV<<.11KKIQ #0X">1D
1D Y--k3?@ _ 

U#  ()H44G%j1#++113a0	4b 	@KKR 	01CeWEUV

 %% 	@1DMJJLIIKse454?		@s   O& &P496P//P4c                     | j                   S r:   rO   r&   s    r   rP   z _SQLiteManySeqFilesDict.__repr__  s    zzr)   c                 j    t        | j                  j                  d|f      j                               S )Nz(SELECT key FROM offset_data WHERE key=?;)boolr   r   r   rf   s     r   __contains__z$_SQLiteManySeqFilesDict.__contains__  s/    II:SFhj
 	
r)   c                     | j                   S )z%Return the number of records indexed.)r   r&   s    r   rX   z_SQLiteManySeqFilesDict.__len__!  s    ||r)   c              #   l   K   | j                   j                  d      D ]  }t        |d           yw)rZ   z9SELECT key FROM offset_data ORDER BY file_number, offset;r   N)r   r   r   )r'   r   s     r   r(   z _SQLiteManySeqFilesDict.__iter__&  s7     99$$G
 	C c!f+	s   24c                 Z   | j                   j                  d|f      j                         }|st        |\  }}| j                  }||v r||   j                  |      }nt        |      | j                  k\  r+|j                         d   j                  j                          | j                  | j                  | j                  |         }|j                  |      }|||<   | j                  r| j                  |j                        }n|j                  }||k7  rt!        d| d| d      |S )z$Return record for the specified key.z8SELECT file_number, offset FROM offset_data WHERE key=?;r   r]   r^   r_   )r   r   r   KeyErrorrs   r-   rW   rr   popitemrF   r   rq   rp   ro   rB   r`   r   )	r'   r<   r   file_numberr,   proxiesra   proxyrb   s	            r   rc   z#_SQLiteManySeqFilesDict.__getitem__-  s    iiF

(* 	 N!V--'![)--f5F7|t~~-!!$,,224''dook6RSEYYv&F#(GK %%fii0D99D$;23%tD6CDDr)   c                    | j                   j                  d|f      j                         }|st        |\  }}}| j                  }||v rG|r1||   j
                  }|j                  |       |j                  |      S ||   j                  |      S t        |      | j                  k\  r+|j                         d   j
                  j                          | j                  | j                  | j                  |         }|||<   |r.|j
                  }|j                  |       |j                  |      S |j                  |      S )re   z@SELECT file_number, offset, length FROM offset_data WHERE key=?;r   )r   r   r   r   rs   rF   r   r   r/   rW   rr   r   r   rq   rp   ro   )	r'   r<   r   r   r,   r=   r   hr   s	            r   r/   z_SQLiteManySeqFilesDict.get_rawI  s!    iiNQTPV

(* 	 N&)#VV--'!K(00vvvf~%{+33F;; 7|t~~-!!$,,224''dook6RSE#(GK MMvvvf~%}}V,,r)   c                 ~    | j                   }|r/|j                         d   j                  j                          |r.yy)zClose any open file handles.r   N)rs   r   rF   r   )r'   r   s     r   r   z_SQLiteManySeqFilesDict.closen  s1    --OOa ((..0 r)   N)
   )r0   r1   r2   r3   rM   rz   r{   rP   r   rX   r(   rc   r/   r   r4   r)   r   ri   ri     sE    , + ZdELgT

8#-J1r)   ri   )r)r3   collections.abccollections
contextlibr   rt   abcr   r   rl   ImportErrorcontextmanagerr   r    r"   Mappingr6   ri   r4   r)   r   <module>r      s       	   . .bF"I3 "IJr$+//11 r$jg11 g1i  Gs   A0 0A:9A: