# Copyright 2012 by Wibowo Arindrarto.  All rights reserved.
# This file is part of the Biopython distribution and governed by your
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
# Please see the LICENSE file that should have been included as part of this
# package.
"""Bio.SearchIO object to model a single database hit."""

from itertools import chain

from Bio.SearchIO._utils import allitems
from Bio.SearchIO._utils import getattr_str
from Bio.SearchIO._utils import optionalcascade

from ._base import _BaseSearchObject
from .hsp import HSP


class Hit(_BaseSearchObject):
    """Class representing a single database hit of a search result.

    Hit objects are the second-level container in the SearchIO module. They
    are the objects contained within a QueryResult (see QueryResult). They
    themselves are container for HSP objects and will contain at least one
    HSP.

    To have a quick look at a Hit and its contents, invoke ``print`` on it::

        >>> from Bio import SearchIO
        >>> qresult = next(SearchIO.parse('Blast/mirna.xml', 'blast-xml'))
        >>> hit = qresult[3]
        >>> print(hit)
        Query: 33211
               mir_1
          Hit: gi|301171322|ref|NR_035857.1| (86)
               Pan troglodytes microRNA mir-520c (MIR520C), microRNA
         HSPs: ----  --------  ---------  ------  ---------------  ---------------------
                  #   E-value  Bit score    Span      Query range              Hit range
               ----  --------  ---------  ------  ---------------  ---------------------
                  0   8.9e-20     100.47      60           [1:61]                [13:73]
                  1   3.3e-06      55.39      60           [0:60]                [13:73]

    You can invoke ``len`` on a Hit object to see how many HSP objects it contains::

        >>> len(hit)
        2

    Hit objects behave very similar to Python lists. You can retrieve the HSP
    object inside a Hit using the HSP's integer index. Hit objects can also be
    sliced, which will return a new Hit objects containing only the sliced HSPs::

        # HSP items inside the Hit can be retrieved using its integer index
        >>> hit[0]
        HSP(hit_id='gi|301171322|ref|NR_035857.1|', query_id='33211', 1 fragments)

        # slicing returns a new Hit
        >>> hit
        Hit(id='gi|301171322|ref|NR_035857.1|', query_id='33211', 2 hsps)
        >>> hit[:1]
        Hit(id='gi|301171322|ref|NR_035857.1|', query_id='33211', 1 hsps)
        >>> print(hit[1:])
        Query: 33211
               mir_1
          Hit: gi|301171322|ref|NR_035857.1| (86)
               Pan troglodytes microRNA mir-520c (MIR520C), microRNA
         HSPs: ----  --------  ---------  ------  ---------------  ---------------------
                  #   E-value  Bit score    Span      Query range              Hit range
               ----  --------  ---------  ------  ---------------  ---------------------
                  0   3.3e-06      55.39      60           [0:60]                [13:73]

    Hit objects provide ``filter`` and ``map`` methods, which are analogous to
    Python's built-in ``filter`` and ``map`` except that they return a new Hit
    object instead of a list.

    Here is an example of using ``filter`` to select for HSPs whose e-value is
    less than 1e-10::

        >>> evalue_filter = lambda hsp: hsp.evalue < 1e-10
        >>> filtered_hit = hit.filter(evalue_filter)
        >>> len(hit)
        2
        >>> len(filtered_hit)
        1
        >>> print(filtered_hit)
        Query: 33211
               mir_1
          Hit: gi|301171322|ref|NR_035857.1| (86)
               Pan troglodytes microRNA mir-520c (MIR520C), microRNA
         HSPs: ----  --------  ---------  ------  ---------------  ---------------------
                  #   E-value  Bit score    Span      Query range              Hit range
               ----  --------  ---------  ------  ---------------  ---------------------
                  0   8.9e-20     100.47      60           [1:61]                [13:73]

    There are also other methods which are counterparts of Python lists' methods
    with the same names: ``append``, ``index``, ``pop``, and ``sort``. Consult their
    respective documentations for more details and examples of their usage.

    """

    # attributes we don't want to transfer when creating a new Hit class
    # from this one
    _NON_STICKY_ATTRS = ("_items",)

    def __init__(self, hsps=(), id=None, query_id=None):
        """Initialize a Hit object.

        :param hsps: HSP objects contained in the Hit object
        :type hsps: iterable yielding HSP
        :param id: hit ID
        :type id: string
        :param query_id: query ID
        :type query_id: string

        If multiple HSP objects are used for initialization, they must all
        have the same ``query_id``, ``query_description``, ``hit_id``, and
        ``hit_description`` properties.
        """
        # default attribute values
        self._id = id
        self._id_alt = []
        self._query_id = query_id
        self._description = None
        self._description_alt = []
        self._query_description = None
        self.attributes = {}
        self.dbxrefs = []

        # TODO - Move this into the for look below in case
        # hsps is a single use iterator?
        for attr in ("query_id", "query_description", "hit_id", "hit_description"):
            # HACK: setting the if clause to '> 1' allows for empty hit objects.
            # This makes it easier to work with file formats with unpredictable
            # hit-hsp ordering. The empty hit object itself is nonfunctional,
            # however, since all its cascading properties are empty.
            if len({getattr(hsp, attr) for hsp in hsps}) > 1:
                raise ValueError(
                    "Hit object can not contain HSPs with more than one %s." % attr
                )

        self._items = []
        for hsp in hsps:
            # validate each HSP
            self._validate_hsp(hsp)
            # and store it them as an instance attribute
            self.append(hsp)

    def __repr__(self):
        """Return string representation of Hit object."""
        return f"Hit(id={self.id!r}, query_id={self.query_id!r}, {len(self)!r} hsps)"

    def __iter__(self):
        """Iterate over hsps."""
        return iter(self.hsps)

    def __len__(self):
        """Return number of hsps."""
        return len(self.hsps)

    def __bool__(self):
        """Return True if there are hsps."""
        return bool(self.hsps)

    def __contains__(self, hsp):
        """Return True if hsp in items."""
        return hsp in self._items

    def __str__(self):
        """Return a human readable summary of the Hit object."""
        lines = []

        # set query id line
        qid_line = "Query: %s" % self.query_id
        lines.append(qid_line)
        if self.query_description:
            line = "       %s" % self.query_description
            line = line[:77] + "..." if len(line) > 80 else line
            lines.append(line)

        # set hit id line
        hid_line = "  Hit: %s" % self.id
        try:
            seq_len = self.seq_len
        except AttributeError:
            pass
        else:
            hid_line += " (%i)" % seq_len
        lines.append(hid_line)
        if self.description:
            line = "       %s" % self.description
            line = line[:77] + "..." if len(line) > 80 else line
            lines.append(line)

        # set attributes lines
        for key, value in sorted(self.attributes.items()):
            lines.append(f" {key}: {value}")

        # set dbxrefs line
        if self.dbxrefs:
            lines.append("Database cross-references: " + ", ".join(self.dbxrefs))

        # set hsp line and table
        if not self.hsps:
            lines.append(" HSPs: ?")
        else:
            lines.append(
                " HSPs: %s  %s  %s  %s  %s  %s"
                % ("-" * 4, "-" * 8, "-" * 9, "-" * 6, "-" * 15, "-" * 21)
            )
            pattern = "%11s  %8s  %9s  %6s  %15s  %21s"
            lines.append(
                pattern
                % ("#", "E-value", "Bit score", "Span", "Query range", "Hit range")
            )
            lines.append(
                pattern % ("-" * 4, "-" * 8, "-" * 9, "-" * 6, "-" * 15, "-" * 21)
            )
            for idx, hsp in enumerate(self.hsps):
                # evalue
                evalue = getattr_str(hsp, "evalue", fmt="%.2g")
                # bitscore
                bitscore = getattr_str(hsp, "bitscore", fmt="%.2f")
                # alignment length
                aln_span = getattr_str(hsp, "aln_span")
                # query region
                query_start = getattr_str(hsp, "query_start")
                query_end = getattr_str(hsp, "query_end")
                query_range = f"[{query_start}:{query_end}]"
                # max column length is 18
                query_range = (
                    query_range[:13] + "~]" if len(query_range) > 15 else query_range
                )
                # hit region
                hit_start = getattr_str(hsp, "hit_start")
                hit_end = getattr_str(hsp, "hit_end")
                hit_range = f"[{hit_start}:{hit_end}]"
                hit_range = hit_range[:19] + "~]" if len(hit_range) > 21 else hit_range
                # append the hsp row
                lines.append(
                    pattern % (idx, evalue, bitscore, aln_span, query_range, hit_range)
                )

        return "\n".join(lines)

    def __getitem__(self, idx):
        """Return the HSP object at the given index."""
        # if key is slice, return a new Hit instance
        if isinstance(idx, slice):
            obj = self.__class__(self.hsps[idx])
            self._transfer_attrs(obj)
            return obj
        return self._items[idx]

    def __setitem__(self, idx, hsps):
        """Assign hsps to index idx."""
        # handle case if hsps is a list of hsp
        if isinstance(hsps, (list, tuple)):
            for hsp in hsps:
                self._validate_hsp(hsp)
        else:
            self._validate_hsp(hsps)

        self._items[idx] = hsps

    def __delitem__(self, idx):
        """Delete item of index idx."""
        del self._items[idx]

    # hsp properties #
    def _validate_hsp(self, hsp):
        """Validate an HSP object (PRIVATE).

        Valid HSP objects have the same hit_id as the Hit object ID and the
        same query_id as the Hit object's query_id.

        """
        if not isinstance(hsp, HSP):
            raise TypeError("Hit objects can only contain HSP objects.")
        # HACK: to make validation during __init__ work
        if self._items:
            if self.id is not None:
                if hsp.hit_id != self.id:
                    raise ValueError(
                        "Expected HSP with hit ID %r, found %r instead."
                        % (self.id, hsp.hit_id)
                    )
            else:
                self.id = hsp.hit_id

            if self.description is not None:
                if hsp.hit_description != self.description:
                    raise ValueError(
                        "Expected HSP with hit description %r, found %r instead."
                        % (self.description, hsp.hit_description)
                    )
            else:
                self.description = hsp.hit_description

            if self.query_id is not None:
                if hsp.query_id != self.query_id:
                    raise ValueError(
                        "Expected HSP with query ID %r, found %r instead."
                        % (self.query_id, hsp.query_id)
                    )
            else:
                self.query_id = hsp.query_id

            if self.query_description is not None:
                if hsp.query_description != self.query_description:
                    raise ValueError(
                        "Expected HSP with query description %r, found %r instead."
                        % (self.query_description, hsp.query_description)
                    )
            else:
                self.query_description = hsp.query_description

    # properties #
    description = optionalcascade(
        "_description", "hit_description", """Hit description"""
    )
    query_description = optionalcascade(
        "_query_description",
        "query_description",
        """Description of the query that produced the hit""",
    )
    id = optionalcascade("_id", "hit_id", """Hit ID string.""")
    query_id = optionalcascade(
        "_query_id", "query_id", """ID string of the query that produced the hit"""
    )
    # returns all hsps
    hsps = allitems(doc="""HSP objects contained in the Hit""")

    @property
    def id_all(self):
        """Alternative ID(s) of the Hit."""
        return [self.id] + self._id_alt

    @property
    def description_all(self):
        """Alternative descriptions of the Hit."""
        return [self.description] + self._description_alt

    @property
    def fragments(self):
        """Access the HSPFragment objects contained in the Hit."""
        return list(chain(*self._items))

    # public methods #
    def append(self, hsp):
        """Add a HSP object to the end of Hit.

        Parameters
        hsp -- HSP object to append.

        Any HSP object appended must have the same ``hit_id`` property as the
        Hit object's ``id`` property and the same ``query_id`` property as the
        Hit object's ``query_id`` property.

        """
        self._validate_hsp(hsp)
        self._items.append(hsp)

    def filter(self, func=None):
        """Create new Hit object whose HSP objects pass the filter function.

        :param func: function for filtering
        :type func: callable, accepts HSP, returns bool

        ``filter`` is analogous to Python's built-in ``filter`` function, except
        that instead of returning a list it returns a ``Hit`` object. Here is an
        example of using ``filter`` to select for HSPs having bitscores bigger
        than 60::

            >>> from Bio import SearchIO
            >>> qresult = next(SearchIO.parse('Blast/mirna.xml', 'blast-xml'))
            >>> hit = qresult[3]
            >>> evalue_filter = lambda hsp: hsp.bitscore > 60
            >>> filtered_hit = hit.filter(evalue_filter)
            >>> len(hit)
            2
            >>> len(filtered_hit)
            1
            >>> print(filtered_hit)
            Query: 33211
                   mir_1
              Hit: gi|301171322|ref|NR_035857.1| (86)
                   Pan troglodytes microRNA mir-520c (MIR520C), microRNA
             HSPs: ----  --------  ---------  ------  ---------------  ---------------------
                      #   E-value  Bit score    Span      Query range              Hit range
                   ----  --------  ---------  ------  ---------------  ---------------------
                      0   8.9e-20     100.47      60           [1:61]                [13:73]

        """
        hsps = list(filter(func, self.hsps))
        if hsps:
            obj = self.__class__(hsps)
            self._transfer_attrs(obj)
            return obj

    def index(self, hsp):
        """Return the index of a given HSP object, zero-based.

        :param hsp: object to look up
        :type hsp: HSP

        """
        return self._items.index(hsp)

    def map(self, func=None):
        """Create new Hit object, mapping the given function to its HSPs.

        :param func: function for mapping
        :type func: callable, accepts HSP, returns HSP

        ``map`` is analogous to Python's built-in ``map`` function. It is applied to
        all HSPs contained in the Hit object and returns a new Hit object.

        """
        if func is not None:
            hsps = [func(x) for x in self.hsps[:]]  # this creates a shallow copy
        else:
            hsps = self.hsps[:]
        if hsps:
            obj = self.__class__(hsps)
            self._transfer_attrs(obj)
            return obj

    def pop(self, index=-1):
        """Remove and returns the HSP object at the specified index.

        :param index: index of HSP object to pop
        :type index: int

        """
        return self._items.pop(index)

    def sort(self, key=None, reverse=False, in_place=True):
        """Sort the HSP objects.

        :param key: sorting function
        :type key: callable, accepts HSP, returns key for sorting
        :param reverse: whether to reverse sorting results or no
        :type reverse: bool
        :param in_place: whether to do in-place sorting or no
        :type in_place: bool

        ``sort`` defaults to sorting in-place, to mimic Python's ``list.sort``
        method. If you set the ``in_place`` argument to False, it will treat
        return a new, sorted Hit object and keep the initial one unsorted

        """
        if in_place:
            self._items.sort(key=key, reverse=reverse)
        else:
            hsps = self.hsps[:]
            hsps.sort(key=key, reverse=reverse)
            obj = self.__class__(hsps)
            self._transfer_attrs(obj)
            return obj


# if not used as a module, run the doctest
if __name__ == "__main__":
    from Bio._utils import run_doctest

    run_doctest()
