
    Xf,                     6    d Z d Zd Z G d d          Zd ZdS )a~  Parser for the prosite dat file from Prosite at ExPASy.

See https://www.expasy.org/prosite/

Tested with:
 - Release 20.43, 10-Feb-2009
 - Release 2017_03 of 15-Mar-2017.

Functions:
 - read                  Reads a Prosite file containing one Prosite record
 - parse                 Iterates over records in a Prosite file.

Classes:
 - Record                Holds Prosite data.

c              #   8   K   	 t          |           }|sdS |V  )zParse Prosite records.

    This function is for parsing Prosite files containing multiple
    records.

    Arguments:
     - handle   - handle to the file.

    TN)__read)handlerecords     2lib/python3.11/site-packages/Bio/ExPASy/Prosite.pyparser      s1       	E	    c                 n    t          |           }|                                 }|rt          d          |S )zRead one Prosite record.

    This function is for parsing Prosite files containing
    exactly one record.

    Arguments:
     - handle   - handle to the file.

    z"More than one Prosite record found)r   read
ValueError)r   r   	remainders      r   r
   r
   +   s8     F^^FI ?=>>>Mr   c                       e Zd ZdZd ZdS )Recordag  Holds information from a Prosite record.

    Main attributes:
     - name           ID of the record.  e.g. ADH_ZINC
     - type           Type of entry.  e.g. PATTERN, MATRIX, or RULE
     - accession      e.g. PS00387
     - created        Date the entry was created.  (MMM-YYYY for releases
       before January 2017, DD-MMM-YYYY since January 2017)
     - data_update    Date the 'primary' data was last updated.
     - info_update    Date data other than 'primary' data was last updated.
     - pdoc           ID of the PROSITE DOCumentation.
     - description    Free-format description.
     - pattern        The PROSITE pattern.  See docs.
     - matrix         List of strings that describes a matrix entry.
     - rules          List of rule definitions (from RU lines).  (strings)
     - prorules       List of prorules (from PR lines). (strings)

    NUMERICAL RESULTS:
     - nr_sp_release  SwissProt release.
     - nr_sp_seqs     Number of seqs in that release of Swiss-Prot. (int)
     - nr_total       Number of hits in Swiss-Prot.  tuple of (hits, seqs)
     - nr_positive    True positives.  tuple of (hits, seqs)
     - nr_unknown     Could be positives.  tuple of (hits, seqs)
     - nr_false_pos   False positives.  tuple of (hits, seqs)
     - nr_false_neg   False negatives.  (int)
     - nr_partial     False negatives, because they are fragments. (int)

    COMMENTS:
     - cc_taxo_range  Taxonomic range.  See docs for format
     - cc_max_repeat  Maximum number of repetitions in a protein
     - cc_site        Interesting site.  list of tuples (pattern pos, desc.)
     - cc_skip_flag   Can this entry be ignored?
     - cc_matrix_type
     - cc_scaling_db
     - cc_author
     - cc_ft_key
     - cc_ft_desc
     - cc_version     version number (introduced in release 19.0)

    The following are all lists if tuples (swiss-prot accession, swiss-prot name).

    DATA BANK REFERENCES:
     - dr_positive
     - dr_false_neg
     - dr_false_pos
     - dr_potential   Potential hits, but fingerprint region not yet available.
     - dr_unknown     Could possibly belong
     - pdb_structs    List of PDB entries.

    c                    d| _         d| _        d| _        d| _        d| _        d| _        d| _        d| _        d| _        g | _	        g | _
        g | _        g | _        d| _        d| _        d| _        d| _        d| _        d| _        d| _        d| _        d| _        d| _        g | _        d| _        g | _        g | _        g | _        g | _        g | _        g | _        dS )zInitialize the class. )NNN)nametype	accessioncreateddata_updateinfo_updatepdocdescriptionpatternmatrixrulesprorulespostprocessingnr_sp_release
nr_sp_seqsnr_totalnr_positive
nr_unknownnr_false_posnr_false_neg
nr_partialcc_taxo_rangecc_max_repeatcc_sitecc_skip_flagdr_positivedr_false_negdr_false_posdr_potential
dr_unknownpdb_structs)selfs    r   __init__zRecord.__init__q   s    			
 $'&( r   N)__name__
__module____qualname____doc__r1    r   r   r   r   =   s.        1 1f% % % % %r   r   c           	      z   dd l }d }| D ]}|d d         |dd                                          }}|dk    rvt                      }|                    d          }t	          |          dk    rt          d|           |d         |_        |d                             d          |_        |d	k    r|                    d
          |_        |dk    r(|                    d                              d          }|d         	                    d          r(|d         
                    dd          d         |_        nt          d|           |d         	                    d          r(|d         
                    dd          d         |_        nt          d|           |d         	                    d          r)|d         
                    dd          d         |_        t          d|           |dk    r	||_        |dk    r|xj        |z  c_        |dk    r|j                            |           >|dk    r/|j                            |                    d
                     s|dk    r|j                            |           |dk    rg|                    d
          }|D ]L}|sd |                    d          D             \  }	}
|	dk    r4|
                    d          \  }}||_        t-          |          |_        b|	dk    rt-          |
          |_        }|	dk    rt-          |
          |_        |	dv r|                    d|
          }|st7          d|
 d |          t9          t;          t,          |                                                    }|	d!k    r	||_        |	d"k    r	||_         |	d#k    r	||_!        *|	d$k    r||_"        9t          d%|	 d |          |d&k    rF|                    d
          }|D ]+}|r|d d'         d(k    r|#                    d          dk    r.d) |                    d          D             \  }	}
|	d*k    r|
|_$        ^|	d+k    r|
|_%        l|	d,k    rB|
                    d          \  }}|j&                            t-          |          |f           |	d-k    r|
|_'        |	d.k    r|
|_(        |	d/k    r|
|_)        |	d0k    r|
|_*        |	d1k    r|
|_+        |	d2k    r	|
|_,        	|	d3k    r	|
|_-        t          d%|	 d |          P|d4k    r|                    d
          }|D ]}|sd5 |                    d          D             \  }}}|d6k    r|j.                            ||f           K|d7k    r|j/                            ||f           n|d8k    r|j0                            ||f           |d9k    r|j1                            ||f           |d:k    r|j2                            ||f           t          d;|           Y|d<k    rH|                                }|D ]/}|j3                            |                    d
                     0|d=k    r1|                    d
          }|j4                            |           |d>k    r|                    d
          |_5         |d?k    r|s
 nt          d@| dA          d S |st          dB          |S )CN          IDz; z'I don't understand identification line
   .AC;DT)z
 (CREATED)z CREATED zI don't understand date line
)z (DATA UPDATE)z DATA UPDATE)z (INFO UPDATE)z INFO UPDATEDEPAMAPPRUNRc              3   >   K   | ]}|                                 V  d S Nlstrip.0words     r   	<genexpr>z__read.<locals>.<genexpr>   *      GGdkkmmGGGGGGr   =z/RELEASE,z
/FALSE_NEGz/PARTIAL)/TOTAL	/POSITIVE/UNKNOWN
/FALSE_POSz(\d+)\((\d+)\)zBroken data z in comment line
rS   rT   rU   rV   zUnknown qual CC   zAutomatic scalingc              3   >   K   | ]}|                                 V  d S rI   rJ   rL   s     r   rO   z__read.<locals>.<genexpr>   rP   r   z/TAXO-RANGEz/MAX-REPEATz/SITEz
/SKIP-FLAGz/MATRIX_TYPEz/SCALING_DBz/AUTHORz/FT_KEYz/FT_DESCz/VERSIONDRc              3   >   K   | ]}|                                 V  d S rI   )striprL   s     r   rO   z__read.<locals>.<genexpr>  s*      "K"KD4::<<"K"K"K"K"K"Kr   TFNP?zI don't understand type flag 3DPRDOz//zUnknown keyword z foundzUnexpected end of stream.)6rerstripr   splitlenr   r   r   r   endswithrsplitr   r   r   r   r   r   appendr   extendr   r   intr   r$   r%   match	Exceptiontuplemapgroupsr    r!   r"   r#   countr&   r'   r(   r)   cc_matrix_typecc_scaling_db	cc_author	cc_ft_key
cc_ft_desc
cc_versionr*   r,   r+   r-   r.   r/   r   r   )r   re   r   linekeywordvaluecolsdatescolqualdatareleaseseqsmhitsposdescrefsrefaccr   r   idr   s                           r   r   r      s'   IIIF Q Qbqb48??#4#4d??XXF;;t$$D4yyA~~ !RD!R!RSSSq'FKq'..--FKK__$||C00F__ LL%%++D11EQx  !;<< J!&qa!8!8!; !H$!H!HIIIQx  !CDD J%*1X__S!%<%<Q%?"" !H$!H!HIIIQx  !CDD J%*1X__S!%<%<Q%?"" !H$!H!HIII__!&F__NNe#NNN__M  ''''__!((S)9)9::::__L&&&&__;;s##D W W GG		#GGG
d:%%$(JJsOOMGT+2F((+D		F%%\))*-d))F''Z''(+D		F%%NNN!2D99A Y'(Wt(W(Wt(W(WXXX S!((**!5!566Dx''*.,,-1**++,0))--.2+$%UT%U%UT%U%UVVV5W6 __ ;;s##D "W "W c#2#h*=== 99S>>Q&& GG		#GGG
d=((+/F((]**+/F((W__ $

3ICN))3s88T*:;;;;\))*.F''^++,0F))]**+/F((Y&&'+F$$Y&&'+F$$Z''(,F%%Z''(,F%%$%UT%U%UT%U%UVVVE"WF __;;s##D M M "K"KCIIcNN"K"K"KT43;;&--sDk::::S[['..T{;;;;S[['..T{;;;;S[['..T{;;;;S[[%,,c4[9999$%KT%K%KLLLM  __;;==D : :"))"))C..9999:__KK$$EO""5))))__,,s++FKK__ E????@@@ 64555Mr   N)r5   r   r
   r   r   r6   r   r   <module>r      s|    $  "  $Y Y Y Y Y Y Y Y~X X X X Xr   