
    Xf=                         d Z ddlZg dZg dZg dZg dZg dZg dZg d	Zd
 Z	d Z
d Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd ZefdZefdZd Zedk    rddlmZ  ed           dS dS )a  Parsers for the GAF, GPA and GPI formats from UniProt-GOA.

Uniprot-GOA README + GAF format description:
ftp://ftp.ebi.ac.uk/pub/databases/GO/goa/UNIPROT/README

Gene Association File, GAF formats:
http://geneontology.org/docs/go-annotation-file-gaf-format-2.2/
http://geneontology.org/docs/go-annotation-file-gaf-format-2.1/
http://geneontology.org/docs/go-annotation-file-gaf-format-2.0/

Gene Product Association Data  (GPA format) README:
http://geneontology.org/docs/gene-product-association-data-gpad-format/

Gene Product Information (GPI format) README:
http://geneontology.org/docs/gene-product-information-gpi-format/

Go Annotation files are located here:
ftp://ftp.ebi.ac.uk/pub/databases/GO/goa/
    N)DBDB_Object_IDDB_Object_Symbol	QualifierGO_IDDB:ReferenceEvidenceWithAspectDB_Object_NameSynonymDB_Object_TypeTaxon_IDDateAssigned_ByAnnotation_ExtensionGene_Product_Form_ID)r   r   r   r   r   r   r	   r
   r   r   r   r   r   r   r   )r   r   r   r   r   zEvidence coder
   Interacting_taxon_IDr   Assigned_byr   Spliceform_ID)r   r   r   r   r   ECO_Evidence_coder
   r   r   r   zAnnotation ExtensionAnnotation_Properties)r   	DB_subsetr   r   r   DB_Object_Synonymr   TaxonAnnotation_Target_SetAnnotation_CompletedParent_Object_ID)	r   r   r   r   r   r   r   DB_XrefGene_Product_Properties)
r   r   r   r   r   r   r   r   r   r    c              #   f  K   | D ]}|d         dk    r|                     d                              d          }t          |          dk    rK|d                             d          |d<   |d                             d          |d<   t          t	          t
          |                    V  d	S )
zRead GPI 1.0 format files (PRIVATE).

    This iterator is used to read a gp_information.goa_uniprot
    file which is in the GPI 1.0 format.
    r   !
	      |   N)rstripsplitlendictzipGPI10FIELDShandleinlineinrecs      /lib/python3.11/site-packages/Bio/UniProt/GOA.py_gpi10iteratorr4      s        , ,!9d##))$//u::??8>>#&&a8>>#&&a3{E**++++++, ,    c              #     K   | D ]}|d         dk    r|                     d                              d          }t          |          dk    rK|d                             d          |d<   |d                             d          |d<   |d	                             d          |d	<   |d
                             d          |d
<   t          t	          t
          |                    V  dS )zRead GPI 1.1 format files (PRIVATE).

    This iterator is used to read a gp_information.goa_uniprot
    file which is in the GPI 1.1 format.
    r   r"   r#   r$   r%      r'         r(   N)r)   r*   r+   r,   r-   GPI11FIELDSr/   s      r3   _gpi11iteratorr;              
, 
,!9d##))$//u::??8>>#&&a8>>#&&a8>>#&&a8>>#&&a3{E**++++++
, 
,r5   c              #     K   | D ]}|d         dk    r|                     d                              d          }t          |          dk    rK|d                             d          |d<   |d                             d          |d<   |d	                             d          |d	<   |d
                             d          |d
<   t          t	          t
          |                    V  dS )zRead GPI 1.2 format files (PRIVATE).

    This iterator is used to read a gp_information.goa_uniprot
    file which is in the GPI 1.2 format.
    r   r"   r#   r$   r%   r8   r'      r(   	   N)r)   r*   r+   r,   r-   GPI12FIELDSr/   s      r3   _gpi12iteratorrA      r<   r5   c                    |                                  }|                                dk    rt          |           S |                                dk    rt          |           S |                                dk    rt	          |           S |                                dk    rt          d          t          d| d          )zRead GPI format files.

    This function should be called to read a
    gp_information.goa_uniprot file. At the moment, there is
    only one format, but this may change, so
    this function is a placeholder a future wrapper.
    z!gpi-version: 1.2z!gpi-version: 1.1z!gpi-version: 1.0z!gpi-version: 2.1z1Sorry, parsing GPI version 2 not implemented yet.zUnknown GPI version r#   )readlinestriprA   r;   r4   NotImplementedError
ValueErrorr0   r1   s     r3   gpi_iteratorrH      s     __F||~~,,,f%%%	.	.	.f%%%	.	.	.f%%%	.	.	. ""UVVV::::;;;r5   c              #     K   | D ]}|d         dk    r|                     d                              d          }t          |          dk    rK|d                             d          |d<   |d                             d          |d<   |d	                             d          |d	<   |d
                             d          |d
<   t          t	          t
          |                    V  dS )zRead GPA 1.0 format files (PRIVATE).

    This iterator is used to read a gp_association.*
    file which is in the GPA 1.0 format. Do not call directly. Rather,
    use the gpaiterator function.
    r   r"   r#   r$   r%   r7   r'   r>      
   N)r)   r*   r+   r,   r-   GPA10FIELDSr/   s      r3   _gpa10iteratorrM              
, 
,!9d##))$//u::??8>>#&&a8>>#&&a8>>#&&a"IOOC((b	3{E**++++++
, 
,r5   c              #     K   | D ]}|d         dk    r|                     d                              d          }t          |          dk    rK|d                             d          |d<   |d                             d          |d<   |d	                             d          |d	<   |d
                             d          |d
<   t          t	          t
          |                    V  dS )zRead GPA 1.1 format files (PRIVATE).

    This iterator is used to read a gp_association.goa_uniprot
    file which is in the GPA 1.1 format. Do not call directly. Rather
    use the gpa_iterator function
    r   r"   r#   r$   r%   r7   r'   r>   rJ   rK   N)r)   r*   r+   r,   r-   GPA11FIELDSr/   s      r3   _gpa11iteratorrQ      rN   r5   c                     |                                  }|                                dk    rt          |           S |                                dk    rt          |           S t	          d| d          )zRead GPA format files.

    This function should be called to read a
    gene_association.goa_uniprot file. Reads the first record and
    returns a gpa 1.1 or a gpa 1.0 iterator as needed
    z!gpa-version: 1.1z!gpa-version: 1.0zUnknown GPA version r#   )rC   rD   rQ   rM   rF   rG   s     r3   gpa_iteratorrS     sq     __F||~~,,,f%%%	.	.	.f%%%::::;;;r5   c              #     K   | D ]}|d         dk    r|                     d                              d          }t          |          dk    rL|d                             d          |d<   |d                             d          |d<   |d	                             d          |d	<   |d
                             d          |d
<   |d                             d          |d<   t          t	          t
          |                    V  d S Nr   r"   r#   r$   r%   r8   r'   r&   r9   rK      )r)   r*   r+   r,   r-   GAF20FIELDSr/   s      r3   _gaf20iteratorrX            , ,!9d##))$//u::??8>>#&&a8>>#&&a8>>#&&a"IOOC((b	"IOOC((b	3{E**++++++, ,r5   c              #     K   | D ]}|d         dk    r|                     d                              d          }t          |          dk    rL|d                             d          |d<   |d                             d          |d<   |d	                             d          |d	<   |d
                             d          |d
<   |d                             d          |d<   t          t	          t
          |                    V  d S rU   )r)   r*   r+   r,   r-   GAF10FIELDSr/   s      r3   _gaf10iteratorr\   .  rY   r5   c              #     K   d }g }| D ]T}|d         dk    r|                     d                              d          }t          |          dk    rL|d                             d          |d<   |d                             d          |d<   |d	                             d          |d	<   |d
                             d          |d
<   |d                             d          |d<   t          t	          t
          |                    }|d         |k    r'|r%t          j        |          }|g}|d         }|V  7|d         }|                    |           Vd S Nr   r"   r#   r$   r%   r8   r'   r&   r9   rK   rV   r   )r)   r*   r+   r,   r-   r[   copyappendr0   cur_idid_rec_listr1   r2   cur_recret_lists          r3   _gaf10byproteiniteratorrf   =  a     FK ( (!9d##))$//u::??8>>#&&a8>>#&&a8>>#&&a"IOOC((b	"IOOC((b	s;..//>"f,,,y--H")K^,FNNNN^,Fw'''''( (r5   c              #     K   d }g }| D ]T}|d         dk    r|                     d                              d          }t          |          dk    rL|d                             d          |d<   |d                             d          |d<   |d	                             d          |d	<   |d
                             d          |d
<   |d                             d          |d<   t          t	          t
          |                    }|d         |k    r'|r%t          j        |          }|g}|d         }|V  7|d         }|                    |           Vd S r^   )r)   r*   r+   r,   r-   rW   r_   r`   ra   s          r3   _gaf20byproteiniteratorri   V  rg   r5   c                    |                                  }|                                dk    rt          |           S |                                dk    rt          |           S |                                dk    rt          |           S |                                dk    rt          |           S t	          d| d          )a  Iterate over records in a gene association file.

    Returns a list of all consecutive records with the same DB_Object_ID
    This function should be called to read a
    gene_association.goa_uniprot file. Reads the first record and
    returns a gaf 2.0 or a gaf 1.0 iterator as needed
    2016-04-09: added GAF 2.1 iterator & fixed bug in iterator assignment
    In the meantime GAF 2.1 uses the GAF 2.0 iterator
    !gaf-version: 2.0!gaf-version: 1.0!gaf-version: 2.1!gaf-version: 2.2Unknown GAF version r#   )rC   rD   ri   rf   rF   rG   s     r3   gafbyproteiniteratorrp   o  s     __F||~~,,,&v...	.	.	.&v...	.	.	. 'v...	.	.	.
 'v...::::;;;r5   c                    |                                  }|                                dk    rt          |           S |                                dk    rt          |           S |                                dk    rt          |           S |                                dk    rt          |           S t	          d| d          )a  Iterate over a GAF 1.0 or 2.x file.

    This function should be called to read a
    gene_association.goa_uniprot file. Reads the first record and
    returns a gaf 2.x or a gaf 1.0 iterator as needed

    Example: open, read, interat and filter results.

    Original data file has been trimmed to ~600 rows.

    Original source ftp://ftp.ebi.ac.uk/pub/databases/GO/goa/YEAST/goa_yeast.gaf.gz

    >>> from Bio.UniProt.GOA import gafiterator, record_has
    >>> Evidence = {'Evidence': set(['ND'])}
    >>> Synonym = {'Synonym': set(['YA19A_YEAST', 'YAL019W-A'])}
    >>> Taxon_ID = {'Taxon_ID': set(['taxon:559292'])}
    >>> with open('UniProt/goa_yeast.gaf', 'r') as handle:
    ...     for rec in gafiterator(handle):
    ...         if record_has(rec, Taxon_ID) and record_has(rec, Evidence) and record_has(rec, Synonym):
    ...             for key in ('DB_Object_Name', 'Evidence', 'Synonym', 'Taxon_ID'):
    ...                 print(rec[key])
    ...
    Putative uncharacterized protein YAL019W-A
    ND
    ['YA19A_YEAST', 'YAL019W-A']
    ['taxon:559292']
    Putative uncharacterized protein YAL019W-A
    ND
    ['YA19A_YEAST', 'YAL019W-A']
    ['taxon:559292']
    Putative uncharacterized protein YAL019W-A
    ND
    ['YA19A_YEAST', 'YAL019W-A']
    ['taxon:559292']

    rk   rm   rn   rl   ro   r#   )rC   rD   rX   r\   rF   rG   s     r3   gafiteratorrr     s    J __F||~~,,,f%%%	.	.	. f%%%	.	.	.
 f%%%	.	.	.f%%%::::;;;r5   c                 
   d}|dd         D ]L}t          | |         t                    r!| |         D ]
}||dz   z  }|dd         dz   }>|| |         dz   z  }M|| |d                  dz   z  }|                    |           dS )zWrite a single UniProt-GOA record to an output stream.

    Caller should know the  format version. Default: gaf-2.0
    If header has a value, then it is assumed this is the first record,
    a header is written.
     Nr'   r$   r#   )
isinstancelistwrite)outrecr0   fieldsoutstrfieldsubfields         r3   writerecr~     s     F + +fUmT** 	+"5M ) )(S.(CRC[4'FFfUmd**FF
fVBZ 4''F
LLr5   c                 4    | D ]}t          |||           dS )aO  Write a list of GAF records to an output stream.

    Caller should know the  format version. Default: gaf-2.0
    If header has a value, then it is assumed this is the first record,
    a header is written. Typically the list is the one read by fafbyproteinrec, which
    contains all consecutive lines with the same DB_Object_ID
    )rz   N)r~   )
outprotrecr0   rz   ry   s       r3   writebyproteinrecr     s5      0 0/////0 0r5   c                     d}|D ]K}t          | |         t                    r
| |         h}nt          | |                   }|||         z  rd} nL|S )zAccept a record, and a dictionary of field values.

    The format is {'field_name': set([val1, val2])}.
    If any field in the record has  a matching value, the function returns
    True. Otherwise, returns False.
    FT)rv   strset)r2   	fieldvalsretvalr|   set1s        r3   
record_hasr     st     F  eElC(( 	%%L>DDuU|$$D)E"" 	FE	 Mr5   __main__)run_doctest)verbose)__doc__r_   rW   r[   rL   rP   r.   r:   r@   r4   r;   rA   rH   rM   rQ   rS   rX   r\   rf   ri   rp   rr   r~   r   r   __name__
Bio._utilsr    r5   r3   <module>r      s   (   *  &        
 
 
  , , ,", , ,&, , ,&< < <2, , ,(, , ,(< < <$, , ,, , ,( ( (2( ( (2< < <>7< 7< 7<t %0    & 2= 	0 	0 	0 	0  & z&&&&&&K r5   