# Copyright 2000 by Jeffrey Chang.  All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.

"""Code to work with the prosite.doc file from Prosite.

See https://www.expasy.org/prosite/

Tested with:
 - Release 15.0, July 1998
 - Release 16.0, July 1999
 - Release 20.22, 13 November 2007
 - Release 20.43, 10 February 2009

Functions:
 - read               Read a Prodoc file containing exactly one Prodoc entry.
 - parse              Iterates over entries in a Prodoc file.

Classes:
 - Record             Holds Prodoc data.
 - Reference          Holds data from a Prodoc reference.

"""


def read(handle):
    """Read in a record from a file with exactly one Prodoc record."""
    record = __read(handle)
    # We should have reached the end of the record by now
    line = handle.readline()
    if line:
        raise ValueError("More than one Prodoc record found")
    return record


def parse(handle):
    """Iterate over the records in a Prodoc file."""
    while True:
        record = __read(handle)
        if not record:
            return
        yield record


class Record:
    """Holds information from a Prodoc record.

    Attributes:
     - accession      Accession number of the record.
     - prosite_refs   List of tuples (prosite accession, prosite name).
     - text           Free format text.
     - references     List of reference objects.

    """

    def __init__(self):
        """Initialize the class."""
        self.accession = ""
        self.prosite_refs = []
        self.text = ""
        self.references = []


class Reference:
    """Holds information from a Prodoc citation.

    Attributes:
     - number     Number of the reference. (string)
     - authors    Names of the authors.
     - citation   Describes the citation.

    """

    def __init__(self):
        """Initialize the class."""
        self.number = ""
        self.authors = ""
        self.citation = ""


# Below are private functions


def __read_prosite_reference_line(record, line):
    line = line.rstrip()
    if line[-1] != "}":
        raise ValueError(f"I don't understand the Prosite reference on line\n{line}")
    acc, name = line[1:-1].split("; ")
    record.prosite_refs.append((acc, name))


def __read_text_line(record, line):
    record.text += line
    return True


def __read_reference_start(record, line):
    # Read the references
    reference = Reference()
    reference.number = line[1:3].strip()
    if line[1] == "E":
        # If it's an electronic reference, then the URL is on the
        # line, instead of the author.
        reference.citation = line[4:].strip()
    else:
        reference.authors = line[4:].strip()
    record.references.append(reference)


def __read_reference_line(record, line):
    if not line.strip():
        return False
    reference = record.references[-1]
    if line.startswith("     "):
        if reference.authors[-1] == ",":
            reference.authors += line[4:].rstrip()
        else:
            reference.citation += line[5:]
        return True
    raise Exception(f"I don't understand the reference line\n{line}")


def __read_copyright_line(record, line):
    # Skip the copyright statement
    if line.startswith("+----"):
        return False
    return True


def __read(handle):
    # Skip blank lines between records
    for line in handle:
        line = line.rstrip()
        if line and not line.startswith("//"):
            break
    else:
        return None
    record = Record()
    # Read the accession number
    if not line.startswith("{PDOC"):
        raise ValueError("Line does not start with '{PDOC':\n%s" % line)
    if line[-1] != "}":
        raise ValueError(f"I don't understand accession line\n{line}")
    record.accession = line[1:-1]
    # Read the Prosite references
    for line in handle:
        if line.startswith("{PS"):
            __read_prosite_reference_line(record, line)
        else:
            break
    else:
        raise ValueError("Unexpected end of stream.")
    # Read the actual text
    if not line.startswith("{BEGIN"):
        raise ValueError("Line does not start with '{BEGIN':\n%s" % line)
    read_line = __read_text_line
    for line in handle:
        if line.startswith("{END}"):
            # Clean up the record and return
            for reference in record.references:
                reference.citation = reference.citation.rstrip()
                reference.authors = reference.authors.rstrip()
            return record
        elif line[0] == "[" and line[3] == "]" and line[4] == " ":
            __read_reference_start(record, line)
            read_line = __read_reference_line
        elif line.startswith("+----"):
            read_line = __read_copyright_line
        elif read_line:
            if not read_line(record, line):
                read_line = None
    raise ValueError("Unexpected end of stream.")
