
    Xf'              
          d Z ddlZddlmZ ddlmZ ddlZ	 ddlZn# e	$ r ddl
mZ  ed          dw xY wddlmc mZ ddlmZ ddlmZ  ej        d	           ej        d
           ej        d           ej        d           ej        d           ej        d           ej        d           ej        d          dZd Zd Zd Zd Zd Zd Zd ZeeeeeeedZd Z G d d          ZdS )z6
A module to interact with BinaryCIF-formatted files.
    N)deque)Optional)MissingPythonDependencyErrorzCInstall msgpack to use Bio.PDB.binaryCIF (e.g. pip install msgpack))	Structure)StructureBuilderz<i1z<i2<i4z<u1z<u2<u4z<f4z<f8)                      !   c                    | d         d         d         }|d         dk    sJ t           |d                  }t          j        | d         d         |          | d         d<   | d         d                                          d S )Ndataencodingkind	ByteArraytype)_dtypesnp
frombufferpop)columnr   dtypes      2lib/python3.11/site-packages/Bio/PDB/binary_cif.py_byte_array_decoderr    (   s~    f~j)"-HF{****HV$%E]6&>&+A5IIF6N6
6N:""$$$$$    c                 l   | d         d         d         }|d         dk    sJ t           |d                  }|d         }| d         d         }|j        j        t          j        t          j        fv sJ t          j        |||          }|| d         d<   | d         d                                          d S )	Nr   r   r   r   
FixedPointsrcTypefactorr   )r   r   r   r   int32uint32divider   )r   r   r   r%   r   decoded_datas         r   _fixed_point_decoderr+   1   s    f~j)"-HF|++++HY'(EhF&>&!D:?rx333339T6777L)F6N6
6N:""$$$$$r!   c                    | d         d         d         }|d         dk    sJ |d         }|d         }|d         }|||d	z
  z  z
  }| d         d         }t           |d
                  }t          j        |t          j        |||          |          }|| d         d<   | d         d                                          d S )Nr   r   r   r   IntervalQuantizationminmax	num_stepsr
   r$   r&   )r   r   addmultiplyr   )	r   r   min_valmax_valr0   deltar   r   r*   s	            r   _interval_quantization_decoderr6   ?   s    f~j)"-HF55555uoGuoG%IgQ//E&>&!DHY'(E6'2;tU%#H#H#HPUVVVL)F6N6
6N:""$$$$$r!   c                    | d         d         d         }|d         dk    sJ | d         d         }t           |d                  }t          j        |d d d                             |          |dd d                   }t	          |          |d	         k    sJ || d         d<   | d         d                                          d S )
Nr   r   r   r   	RunLengthr$   r   r
   srcSize)r   r   repeatastypelenr   )r   r   r   r   r*   s        r   _run_length_decoderr=   O   s    f~j)"-HF{****&>&!DHY'(E9T##A#Y--e44d14a4jAAL| 33333)F6N6
6N:""$$$$$r!   c                 h   | d         d         d         }|d         dk    sJ t           |d                  }| d         d         }|                    |d          }|d	xx         |d
         z  cc<   |                    |           || d         d<   | d         d                                          d S )Nr   r   r   r   Deltar$   F)copyr   origin)out)r   r;   cumsumr   )r   r   r   r   r*   s        r   _delta_decoderrD   \   s    f~j)"-HFw&&&&HY'(E&>&!D;;u5;11LOOOx))OOOL))))F6N6
6N:""$$$$$r!   c                    | d         d         d         }|d         dk    sJ |d         }|d         }|d         }|rt          j        d	          }nt          j        d
          }| d         d         }||j        j        k    sJ t          j        |j        t           j                  |k    sJ t          j        |f|          }t          j        ||           || d         d<   | d         d                                          d S )Nr   r   r   r   IntegerPacking	byteCountr9   
isUnsignedr	   r   )	r   r   itemsize
issubdtypeunsignedintegerempty_bcif_helperinteger_unpackr   )r   r   
byte_countsrc_sizeis_unsignedr   r   r*   s           r   _integer_packing_decoderrR   j   s	   f~j)"-HF/////+&J	"H<(K  &>&!D,,,,,=R%788KGGGG8XK//Ll333)F6N6
6N:""$$$$$r!   c                 "   | d         d         d         }|d         dk    sJ d|d         |d         di}d| d         d         |d	         di}|d
         }t          |          }t          j        t          |          dz
  ft                    }t          t          |                    D ] }|||         ||dz                     }|||<   !t          |          }	||	         | d         d<   | d         d                                          d S )Nr   r   r   r   StringArrayoffsetsoffsetEncoding)r   r   dataEncoding
stringDatar
   r&   )_decoder   rL   r<   objectranger   )
r   r   offsets_columnlookup_columnstring_datarU   unique_stringsindexunique_stringlookupss
             r   _string_array_decoderrc      s@   f~j)"-HF},,,, 	Y' !12
 
N 	6N6* 0
 
M <(Kn%%GXs7||a/1@@@Ns>**++ . .#GENWUQY5G$GH -um$$G+G4F6N6
6N:""$$$$$r!   )r   r#   r-   r8   r?   rF   rT   c                     t          | d         d                   }|| d         d<   |r%|d         }t          |d                  |            |%| d         d         S )Nr   r   r   r   )r   	_decoders)r   	encodingsr   s      r   rY   rY      sj    fVnZ011I!*F6N:
 ,R=(6"#F+++  , &>&!!r!   c                   X    e Zd ZdZd ZdededefdZd Zd Zd	e	e         d
ede
fdZdS )BinaryCIFParserzrA parser for BinaryCIF files.

    See the `BinaryCIF specification <https://github.com/molstar/BinaryCIF>`_.
    c                 ,    t                      | _        dS )zInitialize a BinaryCIF parser.N)r   _structure_builder)selfs    r   __init__zBinaryCIFParser.__init__   s    "2"4"4r!   
atom_groupcomponent_idreturnc                 (    |dk    r	|dv rdnd}nd}|S )NHETATM)HOHWATWH  )rk   rm   rn   hetero_fields       r   _get_hetero_fieldz!BinaryCIFParser._get_hetero_field   s-    !!".."@"@33cLLLr!   c                 4    t          |d                   }t          |d                   } fdt          ||          D             }d t          |d                   D             }t          |d                   }t          t          |||                    S )Nz_atom_site.group_PDB_atom_site.label_comp_idc                 B    g | ]\  }}                     ||          S rw   )ry   ).0rm   rn   rk   s      r   
<listcomp>z4BinaryCIFParser._get_residue_ids.<locals>.<listcomp>   s=     
 
 
(
L "":|<<
 
 
r!   c                     g | ]}|pd S rv   rw   )r}   codes     r   r~   z4BinaryCIFParser._get_residue_ids.<locals>.<listcomp>   s)     
 
 
 DKC
 
 
r!   z_atom_site.pdbx_PDB_ins_codez_atom_site.auth_seq_id)rY   ziplist)rk   columnsatom_groupscomponent_idshetero_fieldsinsertion_codessequence_idss   `      r   _get_residue_idsz BinaryCIFParser._get_residue_ids   s    g&<=>>(B CDD
 
 
 
,/],K,K
 
 

 
$+G4R,S$T$T
 
 
 w'?@AAC|_EEFFFr!   c                 ,  	
 t          |d                   t          |d                   }t          |d                   }t          |d                   }t          j        |||fd          t          |d                   t          |d                   	d	 t          |d
                   D             t          |d                   
t          |d                   	
fdt          t	          
                    D             S )Nz_atom_site.label_atom_idz_atom_site.Cartn_xz_atom_site.Cartn_yz_atom_site.Cartn_zr
   )axisz_atom_site.B_iso_or_equivz_atom_site.occupancyc                 0    g | ]}t          |pd           S r   )str)r}   alt_ids     r   r~   z.BinaryCIFParser._get_atoms.<locals>.<listcomp>   s1     
 
 
#)C#
 
 
r!   z_atom_site.label_alt_idz_atom_site.idz_atom_site.type_symbolc                     g | ]=}|         |         |         |         |         |         |         |         d >S ))namecoordb_factor	occupancyaltlocfullnameserial_numberelementrw   )	r}   r`   alt_ids	b_factorscoordinates_listnamesoccupanciesserial_numberstype_symbolss	     r   r~   z.BinaryCIFParser._get_atoms.<locals>.<listcomp>   so     
 
 
  e)%0%e,(/!%.!%L!/!6'.	 	
 
 
r!   )rY   r   stackr[   r<   )rk   r   x_listy_listz_listr   r   r   r   r   r   r   s        @@@@@@@r   
_get_atomszBinaryCIFParser._get_atoms   sE    :;<<!5677!5677!56778VVV$<1EEEG$?@AA	g&<=>>
 
-4W=V5W-X-X
 
 
 !!9::w'?@AA
 
 
 
 
 
 
 
 
 
 s>2233
 
 
 	
r!   idsourcec                 ^   t          |d          r|                    d           |                    d          rt          j        |d          nt	          |d          5 }t          j        |d          }ddd           n# 1 swxY w Y   d	 |d
         D             }t          |d                   }t          |d                   }|                     |          }t          |d                   }	| 	                    |          }
t          |d                   d         }| j
                            |p|           d}d}d}d}d}t          t          |                    D ]}||         }||         }||         }|	|         }||k    r&| j
                            ||           |dz  }|}d}d}||k    r| j
                            |           |}d}||k    s||k    r | j
        j        |g|R   |}|} | j
        j        di |
|          | j
                                        S )a  Parse and return the PDB structure from a BinaryCIF file.

        :param str id: the PDB code for this structure
        :param str source: the path to the BinaryCIF file
        :return: the PDB structure
        :rtype: Bio.PDB.Structure.Structure
        seekr   z.gzrb)modeT)use_listNc                 b    i | ],}|d          D ]!}|d         D ]}|d          d|d          |"-S )
categoriesr   r   .rw   )r}   
data_blockcategoryr   s       r   
<dictcomp>z1BinaryCIFParser.get_structure.<locals>.<dictcomp>  sy     
 
 
&|4
 
 "9-	
 
  22&.22F
 
 
 
 
r!   
dataBlocksz_atom_site.pdbx_PDB_model_numz_atom_site.label_asym_idr{   z	_entry.idr
   rw   )hasattrr   endswithgzipopenmsgpackunpackrY   r   r   rj   init_structurer[   r<   
init_model
init_chaininit_residue	init_atomget_structure)rk   r   r   fileresultr   atom_model_numbersatom_chain_idsatom_residue_idsatom_component_idsatomsentry_idbuilder_model_countbuilder_model_numberbuilder_chain_idbuilder_residue_idbuilder_component_idr`   model_numberchain_id
residue_idrn   s                         r   r   zBinaryCIFParser.get_structure   s    66"" 	KKNNN u%%)DIf4((((f4(((	9 ^D4888F	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9 	9
 
$\2
 
 
 %W-L%MNN )C!DEE0099$W-G%HII((7;/003..r~X>>>#!#312233 	> 	>E-e4L%e,H)%0J-e4L333'223FUUU#q(#'3$#' %)"+++'228<<<#+ %)"///<CW3W3W4'4\OJOOOO%/"'3$-D#-==e====&44666s   "BB	B	N)__name__
__module____qualname____doc__rl   r   ry   r   r   r   r   r   rw   r!   r   rh   rh      s         
5 5 5C s s    G G G
 
 
8?7 ?7s ?7y ?7 ?7 ?7 ?7 ?7 ?7r!   rh   )r   r   collectionsr   typingr   numpyr   r   ImportErrorBior   Bio.PDB._bcif_helperPDBrM   Bio.PDB.Structurer   Bio.PDB.StructureBuilderr   r   r   r    r+   r6   r=   rD   rR   rc   re   rY   rh   rw   r!   r   <module>r      s0                    NNNN   000000
&
&M  , + + + + + + + + ' ' ' ' ' ' 5 5 5 5 5 5 rxrxrxrxrxrx	 	% % %% % %% % % 
% 
% 
%% % %% % %.% % %> %&:$.( 		" 	" 	"{7 {7 {7 {7 {7 {7 {7 {7 {7 {7s    5