
    Xf                     h    d Z d Zd Z G d de          Zd Zedk    rddlmZ  e             d	S d	S )
aX  Parser for the cellosaurus.txt file from ExPASy.

See https://web.expasy.org/cellosaurus/

Tested with the release of Version 18 (July 2016).

Functions:
 - read       Reads a file containing one cell line entry
 - parse      Reads a file containing multiple cell line entries

Classes:
 - Record     Holds cell line data.

Examples
--------
This example downloads the Cellosaurus database and parses it. Note that
urlopen returns a stream of bytes, while the parser expects a stream of plain
string, so we use TextIOWrapper to convert bytes to string using the UTF-8
encoding. This is not needed if you download the cellosaurus.txt file in
advance and open it (see the comment below).

    >>> from urllib.request import urlopen
    >>> from io import TextIOWrapper
    >>> from Bio.ExPASy import cellosaurus
    >>> url = "ftp://ftp.expasy.org/databases/cellosaurus/cellosaurus.txt"
    >>> bytestream = urlopen(url)
    >>> textstream = TextIOWrapper(bytestream, "UTF-8")
    >>> # alternatively, use
    >>> # textstream = open("cellosaurus.txt")
    >>> # if you downloaded the cellosaurus.txt file in advance.
    >>> records = cellosaurus.parse(textstream)
    >>> for record in records:
    ...     if 'Homo sapiens' in record['OX'][0]:
    ...         print(record['ID'])  # doctest:+ELLIPSIS
    ...
    #15310-LN
    #W7079
    (L)PC6
    0.5alpha
    ...

c              #   8   K   	 t          |           }|sdS |V  )zParse cell line records.

    This function is for parsing cell line files containing multiple
    records.

    Arguments:
     - handle   - handle to the file.

    TN)__read)handlerecords     6lib/python3.11/site-packages/Bio/ExPASy/cellosaurus.pyparser   2   s1       	E	    c                 n    t          |           }|                                 }|rt          d          |S )zRead one cell line record.

    This function is for parsing cell line files containing
    exactly one record.

    Arguments:
     - handle   - handle to the file.

    z$More than one cell line record found)r   read
ValueError)r   r   	remainders      r   r
   r
   C   s9     F^^FI A?@@@Mr   c                   $    e Zd ZdZd Zd Zd ZdS )Recorda$  Holds information from an ExPASy Cellosaurus record as a Python dictionary.

    Each record contains the following keys:

    =========  ==============================  =======================
    Line code  Content                         Occurrence in an entry
    =========  ==============================  =======================
    ID         Identifier (cell line name)     Once; starts an entry
    AC         Accession (CVCL_xxxx)           Once
    AS         Secondary accession number(s)   Optional; once
    SY         Synonyms                        Optional; once
    DR         Cross-references                Optional; once or more
    RX         References identifiers          Optional: once or more
    WW         Web pages                       Optional; once or more
    CC         Comments                        Optional; once or more
    ST         STR profile data                Optional; twice or more
    DI         Diseases                        Optional; once or more
    OX         Species of origin               Once or more
    HI         Hierarchy                       Optional; once or more
    OI         Originate from same individual  Optional; once or more
    SX         Sex of cell                     Optional; once
    AG         Age of donor at sampling        Optional; once
    CA         Category                        Once
    DT         Date (entry history)            Once
    //         Terminator                      Once; ends an entry
    =========  ==============================  =======================

    c                     t                               |            d| d<   d| d<   d| d<   d| d<   g | d<   g | d<   g | d<   g | d	<   g | d
<   g | d<   g | d<   g | d<   g | d<   d| d<   d| d<   d| d<   d| d<   dS )zInitialize the class. IDACASSYDRRXWWCCSTDIOXHIOISXAGCADTN)dict__init__selfs    r   r#   zRecord.__init__s   s    dT
T
T
T
T
T
T
T
T
T
T
T
T
T
T
T
T


r   c                     | d         rA| d         r!| j         j         d| d          d| d          dS | j         j         d| d          dS | j         j         dS )z@Return the canonical string representation of the Record object.r   r   z (z, )z ( ))	__class____name__r$   s    r   __repr__zRecord.__repr__   s{    : 	4Dz C.1PPT$ZPP4:PPPP.1BBT$ZBBBBn-3333r   c                    d| d         z   }|d| d         z   z  }|d| d         z   z  }|d| d         z   z  }|d	t          | d
                   z   z  }|dt          | d                   z   z  }|dt          | d                   z   z  }|dt          | d                   z   z  }|dt          | d                   z   z  }|dt          | d                   z   z  }|dt          | d                   z   z  }|dt          | d                   z   z  }|dt          | d                   z   z  }|d| d         z   z  }|d| d         z   z  }|d| d          z   z  }|d!| d"         z   z  }|S )#z=Return a readable string representation of the Record object.zID: r   z AC: r   z AS: r   z SY: r   z DR: r   z RX: r   z WW: r   z CC: r   z ST: r   z DI: r   z OX: r   z HI: r   z OI: r   z SX: r   z AG: r   z CA: r    z DT: r!   )repr)r%   outputs     r   __str__zRecord.__str__   s   $t*$'DJ&&'DJ&&'DJ&&'Dd,,,,'Dd,,,,'Dd,,,,'Dd,,,,'Dd,,,,'Dd,,,,'Dd,,,,'Dd,,,,'Dd,,,,'DJ&&'DJ&&'DJ&&'DJ&&r   N)r)   
__module____qualname____doc__r#   r*   r.    r   r   r   r   U   sK         :  *4 4 4    r   r   c                    d }| D ]}|d d         |dd                                           }}|dk    rt                      }||d<   B|dv r||xx         |z  cc<   W|dv r||                             |           w|dk    rZ|                    d          \  }}|d                             |                                |                                f           |dk    r|r|c S |rt          d	          d S )
N      r   )r   r   r   r   r   r    r!   )r   r   r   r   r   r   r   r   r   ;z//zUnexpected end of stream)rstripr   appendsplitstripr   )r   r   linekeyvaluekvs          r   r   r      s<   F  "1"XtABBx00U$;;XXF F4LL>>>3KKK5 KKKK 

 

 

 3Ku%%%%D[[;;s##DAq4LAGGII 67777D[[ 	 
  534445 5r   __main__    )run_doctestN)	r1   r   r
   r"   r   r   r)   
Bio._utilsrB   r2   r   r   <module>rD      s   ) )X  "  $P P P P PT P P Pl5 5 5D z&&&&&&KMMMMM r   