
    Xf_/                     h    d Z ddlmZ  G d dej        e          Z G d de          ZddZd	 Zd
S )zParsing TRANSFAC files.    )motifsc                   2     e Zd ZdZh dZh dZ fdZ xZS )Motifa  Store the information for one TRANSFAC motif.

    This class inherits from the Bio.motifs.Motif base class, as well
    as from a Python dictionary. All motif information found by the parser
    is stored as attributes of the base class when possible; see the
    Bio.motifs.Motif base class for a description of these attributes. All
    other information associated with the motif is stored as (key, value)
    pairs in the dictionary, where the key is the two-letter fields as found
    in the TRANSFAC file. References are an exception: These are stored in
    the .references attribute.

    These fields are commonly found in TRANSFAC files::

        AC:    Accession number
        AS:    Accession numbers, secondary
        BA:    Statistical basis
        BF:    Binding factors
        BS:    Factor binding sites underlying the matrix
               [sequence; SITE accession number; start position for matrix
               sequence; length of sequence used; number of gaps inserted;
               strand orientation.]
        CC:    Comments
        CO:    Copyright notice
        DE:    Short factor description
        DR:    External databases
               [database name: database accession number]
        DT:    Date created/updated
        HC:    Subfamilies
        HP:    Superfamilies
        ID:    Identifier
        NA:    Name of the binding factor
        OC:    Taxonomic classification
        OS:    Species/Taxon
        OV:    Older version
        PV:    Preferred version
        TY:    Type
        XX:    Empty line; these are not stored in the Record.

    References are stored in an .references attribute, which is a list of
    dictionaries with the following keys::

        RN:    Reference number
        RA:    Reference authors
        RL:    Reference data
        RT:    Reference title
        RX:    PubMed ID

    For more information, see the TRANSFAC documentation.
    >   BFBSCCDRDTHCHPOV>   RARLRTRXc                     	 t                                          |          }n=# t          $ r0 t          t          j        |                               |          }Y nw xY w|S )N)super__getitem__	TypeErrorr   r   )selfkeyvalue	__class__s      3lib/python3.11/site-packages/Bio/motifs/transfac.pyr   zMotif.__getitem__F   sd    	?GG'',,EE 	? 	? 	?&,--99#>>EEE	?s   !% 7AA)__name__
__module____qualname____doc__multiple_value_keysreference_keysr   __classcell__)r   s   @r   r   r      s_        0 0d KJJ .--N            r   c                       e Zd ZdZd Zd ZdS )RecordzStore the information in a TRANSFAC matrix table.

    The record inherits from a list containing the individual motifs.

    Attributes:
     - version - The version number, corresponding to the 'VV' field
       in the TRANSFAC file;

    c                     d| _         dS )zInitialize the class.N)versionr   s    r   __init__zRecord.__init__Y   s    r"   c                      t          |           S )z'Turn the TRANSFAC matrix into a string.)writer'   s    r   __str__zRecord.__str__]   s    T{{r"   N)r   r   r   r   r(   r+    r"   r   r$   r$   N   s<               r"   r$   Tc                 B	   i }g }d}t                      }| D ]}|                                }|s|                    dd          }|d                                         }|r&t          |          dk    rt	          d| d          t          |          dk    rJ|d                                         }	|r.|                    d          d         st	          d| d          |d	k    r	|	|_        n+|d
v r&i }|	                                dd         g dk    rt	          d| d|           d}
dD ]}g ||<   | D ]}|                                }|                    dd          }|d                                         }t          |          dk    rJ|d                                         }	|r.|                    d          d         st	          d| d          	 t          |          }n# t          $ r Y  nw xY w|
dk    r|dk    r|rt	          d|           n|
dz  }
||
k    rt	          d| d          |rUt          |          dk    rt	          d|dd|dd|dd          t          |          dk    rt	          d| d          |	                                dd         }t          |          dk    rt	          d| d          t          d|          D ]-\  }}||         	                    t          |                     .|dk    r|dk    r|	                    d          \  }}}|d         dk    rt	          d| d | d          |d!         d"k    rt	          d| d#| d          t          |dd!                   }t          |          |dz
  k    r,t	          d|dd$t          |          dz   dd|dd          ||	i}|	                    |           |d%k    rJ|Bt          d|&          }|                    |           ||_        |	                    |           i }g }8|t          j        v r|	||<   M|t          j        v r&||vrg ||<   ||         	                    |	           |	||<   |S )'z4Parse a transfac format handle into a Record object.N   r      zAThe key value of a TRANSFAC motif line should have 2 characters:""  zKA TRANSFAC motif line should have 2 spaces between key and value columns: "VV)P0PO   )ACGTzA TRANSFAC matrix "z(" line should be followed by "A C G T": ACGTzaA TRANSFAC matrix should start with "01" as first row of the matrix, but this matrix uses "00": "zKThe TRANSFAC matrix row number does not match the position in the matrix: "zLA TRANSFAC matrix line should have a 2 digit key at the start of the line ("02dz"), but this matrix uses "dz": "sz".z7A TRANSFAC matrix line should have a key and a value: "zQA TRANSFAC matrix line should have a value for each nucleotide (A, C, G and T): "XXRN;[zThe index "z2" in a TRANSFAC RN line should start with a "[": "]z0" in a TRANSFAC RN line should end with a "]": "zP" of the TRANSFAC RN line does not match the current number of seen references "//)alphabetcounts)r$   stripsplitlen
ValueError	partitionr&   intzipappendfloatr   update
referencesr    r   )handlestrictannotationsrQ   rF   recordline	key_valuer   r   lengthcivaluesvindex	separator	accession	referencemotifs                       r   readrb   b   s   KJFXXF x% x%zz|| 	JJtQ''	l  "" 	3xx1}}          y>>QaL&&((E ~~d++A. $$ $ $ $  
 $;;"FNNL  F{{}}RaR $8$8$888 5# 5 5.25 5   F  q		 0/ 0/zz|| JJtQ//	l((**y>>Q&&%aL..00E #~~d33A6 ",!LDH!L !L !L# # CAA!   EEQ;;166 (RKOR R   aKF;;$4,04 4 4    3xx1}}(J?@IJ J67IJ J?CIJ J J  
 9~~**(0(,0 0 0   rr*v;;!##$A9=A A A    // / /DAq1I$$U1XX..../4<<D[[*///#*>*>'E9iQx3 -% - -%)- - -   RyC -% - -%)- - -   ad$$E:%!)++ =%N = =J!+<= =37<= = =  
 eIi((((D[[!vf===[)))#- e$$$KJJE((("IcNNE---+%%#%C ##E****$KMs   &G66
HHc           
         g }	 | j         }|d|z  }|                    |           n# t          $ r Y nw xY wt          j        }d}| D ]!g }|D ]}d}|D ]}	|	dk    r׉j        }
|
dk    rj        }t          j                  }d	                    dg|z             }|                    |           t          |
          D ]md	                    d	gd
 |D             z             dz   }|t          dz   gfd|D             z   |         gz             z  }|                    |           nd}no	                     |	          }n# t          $ r d}Y nw xY w|D|	|v r"|D ]}|	 d| }|                    |           n|	 d| }|                    |           d}|	dk    r]	 j        }d}|D ]=}|D ]8}	|                    |	          }||	 d| }|                    |           d}9># t          $ r Y w xY w|rd}|                    |           d}|                    |           d	                    |          dz   }|                    |           #d	                    |          }|S )z7Write the representation of a motif in TRANSFAC format.NzVV  %s
XX
//
))ACAS)ID)r
   CO)NA)DE)TY)OSOC)r   r   )r   )r3   )BA)r   )r   )r	   )r   PVFr3   r   z       z%02.dc                     g | ]}d S )z%6.20gr,   ).0_s     r   
<listcomp>zwrite.<locals>.<listcomp>  s    1L1L1Lq(1L1L1Lr"   z      %sr.   c                 6    g | ]}j         |                  S r,   )rF   )rq   rr   rZ   ra   s     r   rs   zwrite.<locals>.<listcomp>  s$    CCCau|Aq1CCCr"   Tr1   rn   )r?   r   r   r   r   r>   rD   
 )r&   rN   AttributeErrorr   r   rX   degenerate_consensussortedrE   joinrangetuplegetrQ   )r   blocksr&   blockr   sectionslinessectionblankr   rX   sequencelettersrV   r   r\   rQ   keysr`   textrZ   ra   s                       @@r   r*   r*      s   F!. 
   MM%         3H"  @ @ 9	# 9	#GE 4- 4-$;;"\F{{ $9H$U^44G#==$')9::DLL&&&"6]] 
+ 
+HHgY1L1LG1L1L1L%LMM()   $eUGCCCCC7CCCD'{m,' '  
 T**** EE% %		#) % % % $%("555%* 3 3*-}}}} %T 2 2 2 23 '*#4#4U#4#4D!LL... $$;;-%*%5
  >)3 - -I'+ - -(1c(:(:#(=$,*-'8'8'8'8 %T 2 2 2(,--	 *   	    #T"""T		%  4'e776??DKs0   * 
779EE	E	.G99
H	H	N)T)	r   Bior   r   dictlistr$   rb   r*   r,   r"   r   <module>r      s           > > > > >FL$ > > >B    T   (   Dg g g g gr"   