#!/usr/bin/env python
#
#      Restriction Analysis Libraries.
#      Copyright (C) 2004. Frederic Sohm.
#
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.
#
r"""Print the results of restriction enzyme analysis.

PrintFormat prints the results from restriction analysis in 3 different
format: list, column or map.

The easiest way to use it is:

    >>> from Bio.Restriction.PrintFormat import PrintFormat
    >>> from Bio.Restriction.Restriction import RestrictionBatch
    >>> from Bio.Seq import Seq
    >>> pBs_mcs = Seq('GGTACCGGGCCCCCCCTCGAGGTCGACGGTATCGATAAGCTTGATATCGAATTC')
    >>> restriction_batch = RestrictionBatch(['EcoRI', 'BamHI', 'ApaI'])
    >>> result = restriction_batch.search(pBs_mcs)
    >>> my_map = PrintFormat()
    >>> my_map.print_that(result, 'My pBluescript mcs analysis:\n',
    ...               'No site:\n')
    My pBluescript mcs analysis:
    ApaI       :  12.
    EcoRI      :  50.
    No site:
    BamHI     
    <BLANKLINE>
    >>> my_map.sequence = pBs_mcs
    >>> my_map.print_as("map")
    >>> my_map.print_that(result)
               12 ApaI
               |                                                
               |                                     50 EcoRI
               |                                     |          
    GGTACCGGGCCCCCCCTCGAGGTCGACGGTATCGATAAGCTTGATATCGAATTC
    ||||||||||||||||||||||||||||||||||||||||||||||||||||||
    CCATGGCCCGGGGGGGAGCTCCAGCTGCCATAGCTATTCGAACTATAGCTTAAG
    1                                                   54
    <BLANKLINE>
    <BLANKLINE>
       Enzymes which do not cut the sequence.
    <BLANKLINE>
    BamHI     
    <BLANKLINE>
    >>>

Some of the methods of PrintFormat are meant to be overridden by derived
class.

Use the following parameters to control the appearance:

- ConsoleWidth : width of the console used default to 80.
                 should never be less than 60.
- NameWidth    : space attributed to the name in PrintList method.
- Indent       : Indent of the second line.
- MaxSize      : Maximal size of the sequence (default=6:
                 -> 99 999 bp + 1 trailing ','
                 people are unlikely to ask for restriction map of sequences
                 bigger than 100.000 bp. This is needed to determine the
                 space to be reserved for sites location.

                 - MaxSize = 5  =>   9.999 bp
                 - MaxSize = 6  =>  99.999 bp
                 - MaxSize = 7  => 999.999 bp

Example output::

    <------------ ConsoleWidth --------------->
    <- NameWidth ->
    EcoRI         :   1, 45, 50, 300, 400, 650,
                          700, 1200, 2500.
                      <-->
                        Indent

"""  # noqa: W291


import re


class PrintFormat:
    """PrintFormat allow the printing of results of restriction analysis."""

    ConsoleWidth = 80
    NameWidth = 10
    MaxSize = 6
    Cmodulo = ConsoleWidth % NameWidth
    PrefWidth = ConsoleWidth - Cmodulo
    Indent = 4
    linesize = PrefWidth - NameWidth

    def print_as(self, what="list"):
        """Print the results as specified.

        Valid format are:
            'list'      -> alphabetical order
            'number'    -> number of sites in the sequence
            'map'       -> a map representation of the sequence with the sites.

        If you want more flexibility over-ride the virtual method make_format.
        """
        if what == "map":
            self.make_format = self._make_map
        elif what == "number":
            self.make_format = self._make_number
        else:
            self.make_format = self._make_list

    def format_output(self, dct, title="", s1=""):
        """Summarise results as a nicely formatted string.

        Arguments:
         - dct is a dictionary as returned by a RestrictionBatch.search()
         - title is the title of the map.
           It must be a formatted string, i.e. you must include the line break.
         - s1 is the title separating the list of enzymes that have sites from
           those without sites.
         - s1 must be a formatted string as well.

        The format of print_that is a list.
        """
        if not dct:
            dct = self.results
        ls, nc = [], []
        for k, v in dct.items():
            if v:
                ls.append((k, v))
            else:
                nc.append(k)
        return self.make_format(ls, title, nc, s1)

    def print_that(self, dct, title="", s1=""):
        """Print the output of the format_output method (OBSOLETE).

        Arguments:
         - dct is a dictionary as returned by a RestrictionBatch.search()
         - title is the title of the map.
           It must be a formatted string, i.e. you must include the line break.
         - s1 is the title separating the list of enzymes that have sites from
           those without sites.
         - s1 must be a formatted string as well.

        This method prints the output of A.format_output() and it is here
        for backwards compatibility.
        """
        print(self.format_output(dct, title, s1))

    def make_format(self, cut=(), title="", nc=(), s1=""):
        """Virtual method used for formatting results.

        Virtual method.
        Here to be pointed to one of the _make_* methods.
        You can as well create a new method and point make_format to it.
        """
        return self._make_list(cut, title, nc, s1)

    # _make_* methods to be used with the virtual method make_format

    def _make_list(self, ls, title, nc, s1):
        """Summarise a list of positions by enzyme (PRIVATE).

        Return a string of form::

            title.

            enzyme1     :   position1, position2.
            enzyme2     :   position1, position2, position3.

        Arguments:
         - ls is a tuple or list of cutting enzymes.
         - title is the title.
         - nc is a tuple or list of non cutting enzymes.
         - s1 is the sentence before the non cutting enzymes.
        """
        return self._make_list_only(ls, title) + self._make_nocut_only(nc, s1)

    def _make_map(self, ls, title, nc, s1):
        """Summarise mapping information as a string (PRIVATE).

        Return a string of form::

            | title.
            |
            |     enzyme1, position
            |     |
            | AAAAAAAAAAAAAAAAAAAAA...
            | |||||||||||||||||||||
            | TTTTTTTTTTTTTTTTTTTTT...

        Arguments:
         - ls is a list of cutting enzymes.
         - title is the title.
         - nc is a list of non cutting enzymes.
         - s1 is the sentence before the non cutting enzymes.
        """
        return self._make_map_only(ls, title) + self._make_nocut_only(nc, s1)

    def _make_number(self, ls, title, nc, s1):
        """Format cutting position information as a string (PRIVATE).

        Returns a string in the form::

            title.

            enzyme which cut 1 time:

            enzyme1     :   position1.

            enzyme which cut 2 times:

            enzyme2     :   position1, position2.
            ...

        Arguments:
         - ls is a list of cutting enzymes.
         - title is the title.
         - nc is a list of non cutting enzymes.
         - s1 is the sentence before the non cutting enzymes.
        """
        return self._make_number_only(ls, title) + self._make_nocut_only(nc, s1)

    def _make_nocut(self, ls, title, nc, s1):
        """Summarise non-cutting enzymes (PRIVATE).

        Return a formatted string of the non cutting enzymes.

        ls is a list of cutting enzymes -> will not be used.
        Here for compatibility with make_format.

        Arguments:
         - title is the title.
         - nc is a list of non cutting enzymes.
         - s1 is the sentence before the non cutting enzymes.
        """
        return title + self._make_nocut_only(nc, s1)

    def _make_nocut_only(self, nc, s1, ls=(), title=""):
        """Summarise non-cutting enzymes (PRIVATE).

        Return a formatted string of the non cutting enzymes.

        Arguments:
         - nc is a tuple or list of non cutting enzymes.
         - s1 is the sentence before the non cutting enzymes.
        """
        if not nc:
            return s1
        st = ""
        stringsite = s1 or "\n   Enzymes which do not cut the sequence.\n\n"
        Join = "".join
        for key in sorted(nc):
            st = Join((st, str.ljust(str(key), self.NameWidth)))
            if len(st) > self.linesize:
                stringsite = Join((stringsite, st, "\n"))
                st = ""
        stringsite = Join((stringsite, st, "\n"))
        return stringsite

    def _make_list_only(self, ls, title, nc=(), s1=""):
        """Summarise list of positions per enzyme (PRIVATE).

        Return a string of form::

            title.

            enzyme1     :   position1, position2.
            enzyme2     :   position1, position2, position3.
            ...

        Arguments:
         - ls is a tuple or list of results.
         - title is a string.
         - Non cutting enzymes are not included.
        """
        if not ls:
            return title
        return self.__next_section(ls, title)

    def _make_number_only(self, ls, title, nc=(), s1=""):
        """Summarise number of cuts as a string (PRIVATE).

        Return a string of form::

            title.

            enzyme which cut 1 time:

            enzyme1     :   position1.

            enzyme which cut 2 times:

            enzyme2     :   position1, position2.
            ...

        Arguments:
         - ls is a list of results.
         - title is a string.
         - Non cutting enzymes are not included.
        """
        if not ls:
            return title
        ls.sort(key=lambda x: len(x[1]))
        iterator = iter(ls)
        cur_len = 1
        new_sect = []
        for name, sites in iterator:
            length = len(sites)
            if length > cur_len:
                title += "\n\nenzymes which cut %i times :\n\n" % cur_len
                title = self.__next_section(new_sect, title)
                new_sect, cur_len = [(name, sites)], length
                continue
            new_sect.append((name, sites))
        title += "\n\nenzymes which cut %i times :\n\n" % cur_len
        return self.__next_section(new_sect, title)

    def _make_map_only(self, ls, title, nc=(), s1=""):
        """Make string describing cutting map (PRIVATE).

        Return a string of form::

            | title.
            |
            |     enzyme1, position
            |     |
            | AAAAAAAAAAAAAAAAAAAAA...
            | |||||||||||||||||||||
            | TTTTTTTTTTTTTTTTTTTTT...

        Arguments:
         - ls is a list of results.
         - title is a string.
         - Non cutting enzymes are not included.
        """
        if not ls:
            return title
        resultKeys = sorted(str(x) for x, y in ls)
        map = title or ""
        enzymemap = {}
        for enzyme, cut in ls:
            for c in cut:
                if c in enzymemap:
                    enzymemap[c].append(str(enzyme))
                else:
                    enzymemap[c] = [str(enzyme)]
        mapping = sorted(enzymemap.keys())
        cutloc = {}
        x, counter, length = 0, 0, len(self.sequence)
        for x in range(60, length, 60):
            counter = x - 60
            loc = []
            cutloc[counter] = loc
            remaining = []
            for key in mapping:
                if key <= x:
                    loc.append(key)
                else:
                    remaining.append(key)
            mapping = remaining
        cutloc[x] = mapping
        sequence = str(self.sequence)
        revsequence = str(self.sequence.complement())
        a = "|"
        base, counter = 0, 0
        emptyline = " " * 60
        Join = "".join
        for base in range(60, length, 60):
            counter = base - 60
            line = emptyline
            for key in cutloc[counter]:
                s = ""
                if key == base:
                    for n in enzymemap[key]:
                        s = " ".join((s, n))
                    chunk = line[0:59]
                    lineo = Join((chunk, str(key), s, "\n"))
                    line2 = Join((chunk, a, "\n"))
                    linetot = Join((lineo, line2))
                    map = Join((map, linetot))
                    break
                for n in enzymemap[key]:
                    s = " ".join((s, n))
                k = key % 60
                lineo = Join((line[0 : (k - 1)], str(key), s, "\n"))
                line = Join((line[0 : (k - 1)], a, line[k:]))
                line2 = Join((line[0 : (k - 1)], a, line[k:], "\n"))
                linetot = Join((lineo, line2))
                map = Join((map, linetot))
            mapunit = "\n".join(
                (
                    sequence[counter:base],
                    a * 60,
                    revsequence[counter:base],
                    Join(
                        (
                            str.ljust(str(counter + 1), 15),
                            " " * 30,
                            str.rjust(str(base), 15),
                            "\n\n",
                        )
                    ),
                )
            )
            map = Join((map, mapunit))
        line = " " * 60
        for key in cutloc[base]:
            s = ""
            if key == length:
                for n in enzymemap[key]:
                    s = Join((s, " ", n))
                chunk = line[0 : (length - 1)]
                lineo = Join((chunk, str(key), s, "\n"))
                line2 = Join((chunk, a, "\n"))
                linetot = Join((lineo, line2))
                map = Join((map, linetot))
                break
            for n in enzymemap[key]:
                s = Join((s, " ", n))
            k = key % 60
            lineo = Join((line[0 : (k - 1)], str(key), s, "\n"))
            line = Join((line[0 : (k - 1)], a, line[k:]))
            line2 = Join((line[0 : (k - 1)], a, line[k:], "\n"))
            linetot = Join((lineo, line2))
            map = Join((map, linetot))
        mapunit = ""
        mapunit = Join((sequence[base:length], "\n"))
        mapunit = Join((mapunit, a * (length - base), "\n"))
        mapunit = Join((mapunit, revsequence[base:length], "\n"))
        mapunit = Join(
            (
                mapunit,
                Join(
                    (
                        str.ljust(str(base + 1), 15),
                        " " * (length - base - 30),
                        str.rjust(str(length), 15),
                        "\n\n",
                    )
                ),
            )
        )
        map = Join((map, mapunit))
        return map

    # private method to do lists:

    def __next_section(self, ls, into):
        """Next section (PRIVATE).

        Arguments:
         - ls is a tuple/list of tuple (string, [int, int]).
         - into is a string to which the formatted ls will be added.

        Format ls as a string of lines:
        The form is::

            enzyme1     :   position1.
            enzyme2     :   position2, position3.

        then add the formatted ls to tot
        return tot.
        """
        indentation = "\n" + (self.NameWidth + self.Indent) * " "
        linesize = self.linesize - self.MaxSize
        pat = re.compile(r"([\w,\s()]){1,%i}[,\.]" % linesize)
        several, Join = "", "".join
        for name, sites in sorted(ls):
            stringsite = ""
            output = Join((", ".join(str(site) for site in sites), "."))
            if len(output) > linesize:
                #
                #   cut where appropriate and add the indentation
                #
                output = [x.group() for x in re.finditer(pat, output)]
                stringsite = indentation.join(output)
            else:
                stringsite = output
            into = Join(
                (into, str(name).ljust(self.NameWidth), " :  ", stringsite, "\n")
            )
        return into
