# Copyright 2005 by Jonathan Taylor.
# All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.
"""Cleaved amplified polymorphic sequence (CAPS) markers.

A CAPS marker is a location a DifferentialCutsite as described below and a
set of primers that can be used to visualize this.  More information can
be found in the paper `Konieczny and Ausubel (1993)`_ (PMID 8106085).

.. _`Konieczny and Ausubel (1993)`: https://doi.org/10.1046/j.1365-313X.1993.04020403.x

"""

from Bio.Align import MultipleSeqAlignment
from Bio.Seq import Seq


class DifferentialCutsite:
    """Differential enzyme cutsite in an alignment.

    A differential cutsite is a location in an alignment where an enzyme cuts
    at least one sequence and also cannot cut at least one other sequence.

    Members:
     - start - Where it lives in the alignment.
     - enzyme - The enzyme that causes this.
     - cuts_in - A list of sequences (as indexes into the alignment) the
       enzyme cuts in.
     - blocked_in - A list of sequences (as indexes into the alignment) the
       enzyme is blocked in.

    """

    def __init__(self, **kwds):
        """Initialize a DifferentialCutsite.

        Each member (as listed in the class description) should be included as a
        keyword.
        """
        self.start = int(kwds["start"])
        self.enzyme = kwds["enzyme"]
        self.cuts_in = kwds["cuts_in"]
        self.blocked_in = kwds["blocked_in"]


class AlignmentHasDifferentLengthsError(Exception):
    """Exception where sequences in alignment have different lengths."""


class CAPSMap:
    """A map of an alignment showing all possible dcuts.

    Members:
     - alignment - The alignment that is mapped.
     - dcuts - A list of possible CAPS markers in the form of
       DifferentialCutsites.

    """

    def __init__(self, alignment, enzymes=None):
        """Initialize the CAPSMap.

        Required:
         - alignment - The alignment to be mapped.

        Optional:
         - enzymes - List of enzymes to be used to create the map.
           Defaults to an empty list.

        """
        if enzymes is None:
            enzymes = []
        if isinstance(alignment, MultipleSeqAlignment):
            self.sequences = [rec.seq for rec in alignment]
            self.size = len(self.sequences)
            self.length = len(self.sequences[0])
            for seq in self.sequences:
                if len(seq) != self.length:
                    raise AlignmentHasDifferentLengthsError
        else:  # Alignment object
            self.sequences = [Seq(s) for s in alignment]
            self.size, self.length = alignment.shape

        self.alignment = alignment
        self.enzymes = enzymes

        # look for dcuts
        self._digest()

    def _digest_with(self, enzyme):
        cuts = []  # list of lists, one per sequence
        all_seq_cuts = []

        # go through each sequence
        for seq in self.sequences:
            # grab all the cuts in the sequence
            seq_cuts = [cut - enzyme.fst5 for cut in enzyme.search(seq)]
            # maintain a list of all cuts in all sequences
            all_seq_cuts.extend(seq_cuts)
            cuts.append(seq_cuts)

        # we sort the all list and remove duplicates
        all_seq_cuts = sorted(set(all_seq_cuts))

        for cut in all_seq_cuts:
            # test for dcuts

            cuts_in = []
            blocked_in = []

            for i, seq in enumerate(self.sequences):
                if cut in cuts[i]:
                    cuts_in.append(i)
                else:
                    blocked_in.append(i)

            if cuts_in and blocked_in:
                self.dcuts.append(
                    DifferentialCutsite(
                        start=cut, enzyme=enzyme, cuts_in=cuts_in, blocked_in=blocked_in
                    )
                )

    def _digest(self):
        self.dcuts = []

        for enzyme in self.enzymes:
            self._digest_with(enzyme)
