# Copyright 2012 by Wibowo Arindrarto.  All rights reserved.
#
# This file is part of the Biopython distribution and governed by your
# choice of the "Biopython License Agreement" or the "BSD 3-Clause License".
# Please see the LICENSE file that should have been included as part of this
# package.

"""Bio.SearchIO parser for BLAST+ tab output format, with or without comments."""

import re

from Bio.SearchIO._index import SearchIndexer
from Bio.SearchIO._model import Hit
from Bio.SearchIO._model import HSP
from Bio.SearchIO._model import HSPFragment
from Bio.SearchIO._model import QueryResult

__all__ = ("BlastTabIndexer", "BlastTabParser", "BlastTabWriter")


# longname-shortname map
# maps the column names shown in a commented output to its short name
# (the one used in the command line)
_LONG_SHORT_MAP = {
    "query id": "qseqid",
    "query acc.": "qacc",
    "query acc.ver": "qaccver",
    "query length": "qlen",
    "subject id": "sseqid",
    "subject acc.": "sacc",
    "subject acc.ver": "saccver",
    "subject length": "slen",
    "alignment length": "length",
    "bit score": "bitscore",
    "score": "score",
    "evalue": "evalue",
    "identical": "nident",
    "% identity": "pident",
    "positives": "positive",
    "% positives": "ppos",
    "mismatches": "mismatch",
    "gaps": "gaps",
    "q. start": "qstart",
    "q. end": "qend",
    "s. start": "sstart",
    "s. end": "send",
    "query frame": "qframe",
    "sbjct frame": "sframe",
    "query/sbjct frames": "frames",
    "query seq": "qseq",
    "subject seq": "sseq",
    "gap opens": "gapopen",
    "query gi": "qgi",
    "subject ids": "sallseqid",
    "subject gi": "sgi",
    "subject gis": "sallgi",
    "BTOP": "btop",
    "subject accs.": "sallacc",
    "subject tax ids": "staxids",
    "subject sci names": "sscinames",
    "subject com names": "scomnames",
    "subject blast names": "sblastnames",
    "subject super kingdoms": "sskingdoms",
    "subject title": "stitle",
    "subject titles": "salltitles",
    "subject strand": "sstrand",
    "% subject coverage": "qcovs",
    "% hsp coverage": "qcovhsp",
}


# function to create a list from semicolon-delimited string
# used in BlastTabParser._parse_result_row
def _list_semicol(s):
    return s.split(";")


def _list_diamond(s):
    return s.split("<>")


# column to class attribute map
_COLUMN_QRESULT = {
    "qseqid": ("id", str),
    "qacc": ("accession", str),
    "qaccver": ("accession_version", str),
    "qlen": ("seq_len", int),
    "qgi": ("gi", str),
}
_COLUMN_HIT = {
    "sseqid": ("id", str),
    "sallseqid": ("id_all", _list_semicol),
    "sacc": ("accession", str),
    "saccver": ("accession_version", str),
    "sallacc": ("accession_all", _list_semicol),
    "sgi": ("gi", str),
    "sallgi": ("gi_all", str),
    "slen": ("seq_len", int),
    "staxids": ("tax_ids", _list_semicol),
    "sscinames": ("sci_names", _list_semicol),
    "scomnames": ("com_names", _list_semicol),
    "sblastnames": ("blast_names", _list_semicol),
    "sskingdoms": ("super_kingdoms", _list_semicol),
    "stitle": ("title", str),
    "salltitles": ("title_all", _list_diamond),
    # set strand as HSP property?
    "sstrand": ("strand", str),
    "qcovs": ("query_coverage", float),
}
_COLUMN_HSP = {
    "bitscore": ("bitscore", float),
    "score": ("bitscore_raw", int),
    "evalue": ("evalue", float),
    "nident": ("ident_num", int),
    "pident": ("ident_pct", float),
    "positive": ("pos_num", int),
    "ppos": ("pos_pct", float),
    "mismatch": ("mismatch_num", int),
    "gaps": ("gap_num", int),
    "gapopen": ("gapopen_num", int),
    "btop": ("btop", str),
    "qcovhsp": ("query_coverage", float),
}
_COLUMN_FRAG = {
    "length": ("aln_span", int),
    "qstart": ("query_start", int),
    "qend": ("query_end", int),
    "sstart": ("hit_start", int),
    "send": ("hit_end", int),
    "qframe": ("query_frame", int),
    "sframe": ("hit_frame", int),
    "frames": ("frames", str),
    "qseq": ("query", str),
    "sseq": ("hit", str),
}
_SUPPORTED_FIELDS = set(
    list(_COLUMN_QRESULT) + list(_COLUMN_HIT) + list(_COLUMN_HSP) + list(_COLUMN_FRAG)
)

# column order in the non-commented tabular output variant
# values must be keys inside the column-attribute maps above
_DEFAULT_FIELDS = [
    "qseqid",
    "sseqid",
    "pident",
    "length",
    "mismatch",
    "gapopen",
    "qstart",
    "qend",
    "sstart",
    "send",
    "evalue",
    "bitscore",
]
# one field from each of the following sets must exist in order for the
# parser to work
_MIN_QUERY_FIELDS = {"qseqid", "qacc", "qaccver"}
_MIN_HIT_FIELDS = {"sseqid", "sacc", "saccver", "sallseqid"}

# simple function to create BLAST HSP attributes that may be computed if
# other certain attributes are present
# This was previously implemented in the HSP objects in the old model

_RE_GAPOPEN = re.compile(r"\w-")


def _compute_gapopen_num(hsp):
    """Return the number of gap openings in the given HSP (PRIVATE)."""
    gapopen = 0
    for seq_type in ("query", "hit"):
        seq = str(getattr(hsp, seq_type).seq)
        gapopen += len(re.findall(_RE_GAPOPEN, seq))
    return gapopen


def _augment_blast_hsp(hsp, attr):
    """Calculate the given HSP attribute, for writing (PRIVATE)."""
    if not hasattr(hsp, attr) and not attr.endswith("_pct"):
        # aln_span is number of identical matches + mismatches + gaps
        if attr == "aln_span":
            hsp.aln_span = hsp.ident_num + hsp.mismatch_num + hsp.gap_num

        # ident and gap requires the num values to be computed first
        elif attr.startswith("ident"):
            setattr(hsp, attr, hsp.aln_span - hsp.mismatch_num - hsp.gap_num)
        elif attr.startswith("gap"):
            setattr(hsp, attr, hsp.aln_span - hsp.ident_num - hsp.mismatch_num)

        elif attr == "mismatch_num":
            setattr(hsp, attr, hsp.aln_span - hsp.ident_num - hsp.gap_num)

        elif attr == "gapopen_num":
            if not hasattr(hsp, "query") or not hasattr(hsp, "hit"):
                raise AttributeError
            hsp.gapopen_num = _compute_gapopen_num(hsp)

    # if the attr is a percent value, calculate it
    if attr == "ident_pct":
        hsp.ident_pct = hsp.ident_num / hsp.aln_span * 100

    elif attr == "pos_pct":
        hsp.pos_pct = hsp.pos_num / hsp.aln_span * 100

    elif attr == "gap_pct":
        hsp.gap_pct = hsp.gap_num / hsp.aln_span * 100


class BlastTabParser:
    """Parser for the BLAST tabular format."""

    def __init__(self, handle, comments=False, fields=_DEFAULT_FIELDS):
        """Initialize the class."""
        self.handle = handle
        self.has_comments = comments
        self.fields = self._prep_fields(fields)
        self.line = self.handle.readline().strip()

    def __iter__(self):
        """Iterate over BlastTabParser, yields query results."""
        # stop iteration if file has no lines
        if not self.line:
            return
        # determine which iterator to use
        elif self.has_comments:
            iterfunc = self._parse_commented_qresult
        else:
            if self.line.startswith("#"):
                raise ValueError(
                    "Encountered unexpected character '#' at the beginning of a line. "
                    "Set comments=True if the file is a commented file."
                )
            iterfunc = self._parse_qresult

        yield from iterfunc()

    def _prep_fields(self, fields):
        """Validate and format the given fields for use by the parser (PRIVATE)."""
        # cast into list if fields is a space-separated string
        if isinstance(fields, str):
            fields = fields.strip().split(" ")
        # blast allows 'std' as a proxy for the standard default lists
        # we want to transform 'std' to its proper column names
        if "std" in fields:
            idx = fields.index("std")
            fields = fields[:idx] + _DEFAULT_FIELDS + fields[idx + 1 :]
        # if set(fields) has a null intersection with minimum required
        # fields for hit and query, raise an exception
        if not set(fields).intersection(_MIN_QUERY_FIELDS) or not set(
            fields
        ).intersection(_MIN_HIT_FIELDS):
            raise ValueError("Required query and/or hit ID field not found.")

        return fields

    def _parse_commented_qresult(self):
        """Yield ``QueryResult`` objects from a commented file (PRIVATE)."""
        while True:
            comments = self._parse_comments()
            if comments:
                try:
                    self.fields = comments["fields"]
                    # iterator for the query results
                    qres_iter = self._parse_qresult()
                except KeyError:
                    # no fields means the query has no results
                    assert "fields" not in comments
                    # create an iterator returning one empty qresult
                    # if the query has no results
                    qres_iter = iter([QueryResult()])

                for qresult in qres_iter:
                    for key, value in comments.items():
                        setattr(qresult, key, value)
                    yield qresult

            else:
                break

    def _parse_comments(self):
        """Return a dictionary containing tab file comments (PRIVATE)."""
        comments = {}
        while True:
            # parse program and version
            # example: # BLASTX 2.2.26+
            if "BLAST" in self.line and "processed" not in self.line:
                program_line = self.line[len(" #") :].split(" ")
                comments["program"] = program_line[0].lower()
                comments["version"] = program_line[1]
            # parse query id and description (if available)
            # example: # Query: gi|356995852 Mus musculus POU domain
            elif "Query" in self.line:
                query_line = self.line[len("# Query: ") :].split(" ", 1)
                comments["id"] = query_line[0]
                if len(query_line) == 2:
                    comments["description"] = query_line[1]
            # parse target database
            # example: # Database: db/minirefseq_protein
            elif "Database" in self.line:
                comments["target"] = self.line[len("# Database: ") :]
            # parse RID (from remote searches)
            elif "RID" in self.line:
                comments["rid"] = self.line[len("# RID: ") :]
            # parse column order, required for parsing the result lines
            # example: # Fields: query id, query gi, query acc., query length
            elif "Fields" in self.line:
                comments["fields"] = self._parse_fields_line()
            # if the line has these strings, it's either the end of a comment
            # or the end of a file, so we return all the comments we've parsed
            elif " hits found" in self.line or "processed" in self.line:
                self.line = self.handle.readline().strip()
                return comments

            self.line = self.handle.readline()

            if not self.line:
                return comments
            else:
                self.line = self.line.strip()

    def _parse_fields_line(self):
        """Return column short names line from 'Fields' comment line (PRIVATE)."""
        raw_field_str = self.line[len("# Fields: ") :]
        long_fields = raw_field_str.split(", ")
        fields = [_LONG_SHORT_MAP[long_name] for long_name in long_fields]
        return self._prep_fields(fields)

    def _parse_result_row(self):
        """Return a dictionary of parsed row values (PRIVATE)."""
        fields = self.fields
        columns = self.line.strip().split("\t")
        if len(fields) != len(columns):
            raise ValueError(
                "Expected %i columns, found: %i" % (len(fields), len(columns))
            )

        qresult, hit, hsp, frag = {}, {}, {}, {}
        for idx, value in enumerate(columns):
            sname = fields[idx]
            # flag to check if any of the _COLUMNs contain sname
            in_mapping = False
            # iterate over each dict, mapping pair to determine
            # attribute name and value of each column
            for parsed_dict, mapping in (
                (qresult, _COLUMN_QRESULT),
                (hit, _COLUMN_HIT),
                (hsp, _COLUMN_HSP),
                (frag, _COLUMN_FRAG),
            ):
                # process parsed value according to mapping
                if sname in mapping:
                    attr_name, caster = mapping[sname]
                    if caster is not str:
                        value = caster(value)
                    parsed_dict[attr_name] = value
                    in_mapping = True
            # make sure that any unhandled field is not supported
            if not in_mapping:
                assert sname not in _SUPPORTED_FIELDS

        return {"qresult": qresult, "hit": hit, "hsp": hsp, "frag": frag}

    def _get_id(self, parsed):
        """Return the value used for a QueryResult or Hit ID from a parsed row (PRIVATE)."""
        # use 'id', with 'id_all', 'accession' and 'accession_version'
        # fallbacks one of these must have a value since we've checked whether
        # they exist or not when parsing the comments
        id_cache = parsed.get("id")
        if id_cache is None and "id_all" in parsed:
            id_cache = parsed.get("id_all")[0]
        if id_cache is None:
            id_cache = parsed.get("accession")
        if id_cache is None:
            id_cache = parsed.get("accession_version")

        return id_cache

    def _parse_qresult(self):
        """Yield QueryResult objects (PRIVATE)."""
        # state values, used to determine what to do with each line
        state_EOF = 0
        state_QRES_NEW = 1
        state_QRES_SAME = 3
        state_HIT_NEW = 2
        state_HIT_SAME = 4
        # dummies for initial states
        qres_state = None
        hit_state = None
        file_state = None
        cur_qid = None
        cur_hid = None
        # dummies for initial id caches
        prev_qid = None
        prev_hid = None
        # dummies for initial parsed value containers
        cur, prev = None, None
        hit_list, hsp_list = [], []

        while True:
            # store previous line's parsed values if we've past the first line
            if cur is not None:
                prev = cur
                prev_qid = cur_qid
                prev_hid = cur_hid
            # only parse the line if it's not EOF or not a comment line
            if self.line and not self.line.startswith("#"):
                cur = self._parse_result_row()
                cur_qid = self._get_id(cur["qresult"])
                cur_hid = self._get_id(cur["hit"])
            else:
                file_state = state_EOF
                # mock values for cur_qid and cur_hid since the line is empty
                cur_qid, cur_hid = None, None

            # get the state of hit and qresult
            if prev_qid != cur_qid:
                qres_state = state_QRES_NEW
            else:
                qres_state = state_QRES_SAME
            # new hits are hits with different id or hits in a new qresult
            if prev_hid != cur_hid or qres_state == state_QRES_NEW:
                hit_state = state_HIT_NEW
            else:
                hit_state = state_HIT_SAME

            # we're creating objects for the previously parsed line(s),
            # so nothing is done in the first parsed line (prev == None)
            if prev is not None:
                # every line is essentially an HSP with one fragment, so we
                # create both of these for every line
                frag = HSPFragment(prev_hid, prev_qid)
                for attr, value in prev["frag"].items():
                    # adjust coordinates to Python range
                    # NOTE: this requires both start and end coords to be
                    # present, otherwise a KeyError will be raised.
                    # Without this limitation, we might misleadingly set the
                    # start / end coords
                    for seq_type in ("query", "hit"):
                        if attr == seq_type + "_start":
                            value = min(value, prev["frag"][seq_type + "_end"]) - 1
                        elif attr == seq_type + "_end":
                            value = max(value, prev["frag"][seq_type + "_start"])
                    setattr(frag, attr, value)
                # strand and frame setattr require the full parsed values
                # to be set first
                for seq_type in ("hit", "query"):
                    # try to set hit and query frame
                    frame = self._get_frag_frame(frag, seq_type, prev["frag"])
                    setattr(frag, "%s_frame" % seq_type, frame)
                    # try to set hit and query strand
                    strand = self._get_frag_strand(frag, seq_type, prev["frag"])
                    setattr(frag, "%s_strand" % seq_type, strand)

                hsp = HSP([frag])
                for attr, value in prev["hsp"].items():
                    setattr(hsp, attr, value)
                hsp_list.append(hsp)

                # create hit and append to temp hit container if hit_state
                # says we're not at the same hit or at a new query
                if hit_state == state_HIT_NEW:
                    hit = Hit(hsp_list)
                    for attr, value in prev["hit"].items():
                        if attr != "id_all":
                            setattr(hit, attr, value)
                        else:
                            # not setting hit ID since it's already set from the
                            # prev_hid above
                            setattr(hit, "_id_alt", value[1:])
                    hit_list.append(hit)
                    hsp_list = []
                # create qresult and yield if we're at a new qresult or EOF
                if qres_state == state_QRES_NEW or file_state == state_EOF:
                    qresult = QueryResult(hit_list, prev_qid)
                    for attr, value in prev["qresult"].items():
                        setattr(qresult, attr, value)
                    yield qresult
                    # if current line is EOF, break
                    if file_state == state_EOF:
                        break
                    hit_list = []

            self.line = self.handle.readline().strip()

    def _get_frag_frame(self, frag, seq_type, parsedict):
        """Return fragment frame for given object (PRIVATE).

        Returns ``HSPFragment`` frame given the object, its sequence type,
        and its parsed dictionary values.
        """
        assert seq_type in ("query", "hit")
        frame = getattr(frag, "%s_frame" % seq_type, None)
        if frame is not None:
            return frame
        else:
            if "frames" in parsedict:
                # frames is 'x1/x2' string, x1 is query frame, x2 is hit frame
                idx = 0 if seq_type == "query" else 1
                return int(parsedict["frames"].split("/")[idx])
            # else implicit None return

    def _get_frag_strand(self, frag, seq_type, parsedict):
        """Return fragment strand for given object (PRIVATE).

        Returns ``HSPFragment`` strand given the object, its sequence type,
        and its parsed dictionary values.
        """
        # NOTE: this will never set the strands as 0 for protein
        # queries / hits, since we can't detect the blast flavors
        # from the columns alone.
        assert seq_type in ("query", "hit")
        strand = getattr(frag, "%s_strand" % seq_type, None)
        if strand is not None:
            return strand
        else:
            # using parsedict instead of the fragment object since
            # we need the unadjusted coordinated values
            start = parsedict.get("%s_start" % seq_type)
            end = parsedict.get("%s_end" % seq_type)
            if start is not None and end is not None:
                return 1 if start <= end else -1
            # else implicit None return


class BlastTabIndexer(SearchIndexer):
    """Indexer class for BLAST+ tab output."""

    _parser = BlastTabParser

    def __init__(self, filename, comments=False, fields=_DEFAULT_FIELDS):
        """Initialize the class."""
        SearchIndexer.__init__(self, filename, comments=comments, fields=fields)

        # if the file doesn't have comments,
        # get index of column used as the key (qseqid / qacc / qaccver)
        if not self._kwargs["comments"]:
            if "qseqid" in fields:
                self._key_idx = fields.index("qseqid")
            elif "qacc" in fields:
                self._key_idx = fields.index("qacc")
            elif "qaccver" in fields:
                self._key_idx = fields.index("qaccver")
            else:
                raise ValueError(
                    "Custom fields is missing an ID column. One of these must be "
                    "present: 'qseqid', 'qacc', or 'qaccver'."
                )

    def __iter__(self):
        """Iterate over the file handle; yields key, start offset, and length."""
        handle = self._handle
        handle.seek(0)

        if not self._kwargs["comments"]:
            iterfunc = self._qresult_index
        else:
            iterfunc = self._qresult_index_commented

        for key, offset, length in iterfunc():
            yield key.decode(), offset, length

    def _qresult_index_commented(self):
        """Indexer for commented BLAST tabular files (PRIVATE)."""
        handle = self._handle
        handle.seek(0)
        start_offset = 0
        # mark of a new query
        query_mark = None
        # mark of the query's ID
        qid_mark = b"# Query: "
        # mark of the last line
        end_mark = b"# BLAST processed"

        while True:
            end_offset = handle.tell()
            line = handle.readline()

            if query_mark is None:
                query_mark = line
                start_offset = end_offset
            elif line.startswith(qid_mark):
                qresult_key = line[len(qid_mark) :].split()[0]
            elif line == query_mark or line.startswith(end_mark):
                yield qresult_key, start_offset, end_offset - start_offset
                start_offset = end_offset
            elif not line:
                break

    def _qresult_index(self):
        """Indexer for noncommented BLAST tabular files (PRIVATE)."""
        handle = self._handle
        handle.seek(0)
        start_offset = 0
        qresult_key = None
        key_idx = self._key_idx

        while True:
            # get end offset here since we only know a qresult ends after
            # encountering the next one
            end_offset = handle.tell()
            # line = handle.readline()
            line = handle.readline()

            if qresult_key is None:
                qresult_key = line.split(b"\t")[key_idx]
            else:
                try:
                    curr_key = line.split(b"\t")[key_idx]
                except IndexError:
                    curr_key = b""

                if curr_key != qresult_key:
                    yield qresult_key, start_offset, end_offset - start_offset
                    qresult_key = curr_key
                    start_offset = end_offset

            # break if we've reached EOF
            if not line:
                break

    def get_raw(self, offset):
        """Return the raw bytes string of a QueryResult object from the given offset."""
        if self._kwargs["comments"]:
            getfunc = self._get_raw_qresult_commented
        else:
            getfunc = self._get_raw_qresult

        return getfunc(offset)

    def _get_raw_qresult(self, offset):
        """Return the raw bytes string of a single QueryResult from a noncommented file (PRIVATE)."""
        handle = self._handle
        handle.seek(offset)
        qresult_raw = b""
        key_idx = self._key_idx
        qresult_key = None

        while True:
            line = handle.readline()
            # get the key if the first line (qresult key)
            if qresult_key is None:
                qresult_key = line.split(b"\t")[key_idx]
            else:
                try:
                    curr_key = line.split(b"\t")[key_idx]
                except IndexError:
                    curr_key = b""
                # only break when qresult is finished (key is different)
                if curr_key != qresult_key:
                    break
            # append to the raw string as long as qresult is the same
            qresult_raw += line

        return qresult_raw

    def _get_raw_qresult_commented(self, offset):
        """Return the bytes raw string of a single QueryResult from a commented file (PRIVATE)."""
        handle = self._handle
        handle.seek(offset)
        qresult_raw = b""
        end_mark = b"# BLAST processed"

        # query mark is the line marking a new query
        # something like '# TBLASTN 2.2.25+'
        query_mark = None
        line = handle.readline()
        while line:
            # since query_mark depends on the BLAST search, we need to obtain it
            # first
            if query_mark is None:
                query_mark = line
            # break when we've reached the next qresult or the search ends
            elif line == query_mark or line.startswith(end_mark):
                break

            qresult_raw += line
            line = handle.readline()

        return qresult_raw


class BlastTabWriter:
    """Writer for blast-tab output format."""

    def __init__(self, handle, comments=False, fields=_DEFAULT_FIELDS):
        """Initialize the class."""
        self.handle = handle
        self.has_comments = comments
        self.fields = fields

    def write_file(self, qresults):
        """Write to the handle, return how many QueryResult objects were written."""
        handle = self.handle
        qresult_counter, hit_counter, hsp_counter, frag_counter = 0, 0, 0, 0

        for qresult in qresults:
            if self.has_comments:
                handle.write(self._build_comments(qresult))
            if qresult:
                handle.write(self._build_rows(qresult))
                if not self.has_comments:
                    qresult_counter += 1
                hit_counter += len(qresult)
                hsp_counter += sum(len(hit) for hit in qresult)
                frag_counter += sum(len(hit.fragments) for hit in qresult)
            # if it's commented and there are no hits in the qresult, we still
            # increment the counter
            if self.has_comments:
                qresult_counter += 1

        # commented files have a line saying how many queries were processed
        if self.has_comments:
            handle.write("# BLAST processed %i queries" % qresult_counter)

        return qresult_counter, hit_counter, hsp_counter, frag_counter

    def _build_rows(self, qresult):
        """Return a string containing tabular rows of the QueryResult object (PRIVATE)."""
        coordinates = {"qstart", "qend", "sstart", "send"}
        qresult_lines = ""
        for hit in qresult:
            for hsp in hit:
                line = []
                for field in self.fields:
                    # get the column value ~ could either be an attribute
                    # of qresult, hit, or hsp
                    if field in _COLUMN_QRESULT:
                        value = getattr(qresult, _COLUMN_QRESULT[field][0])
                    elif field in _COLUMN_HIT:
                        if field == "sallseqid":
                            value = getattr(hit, "id_all")
                        else:
                            value = getattr(hit, _COLUMN_HIT[field][0])
                    # special case, since 'frames' can be determined from
                    # query frame and hit frame
                    elif field == "frames":
                        value = "%i/%i" % (hsp.query_frame, hsp.hit_frame)
                    elif field in _COLUMN_HSP:
                        try:
                            value = getattr(hsp, _COLUMN_HSP[field][0])
                        except AttributeError:
                            attr = _COLUMN_HSP[field][0]
                            _augment_blast_hsp(hsp, attr)
                            value = getattr(hsp, attr)
                    elif field in _COLUMN_FRAG:
                        value = getattr(hsp, _COLUMN_FRAG[field][0])
                    else:
                        assert field not in _SUPPORTED_FIELDS
                        continue

                    # adjust from and to according to strand, if from and to
                    # is included in the output field
                    if field in coordinates:
                        value = self._adjust_coords(field, value, hsp)
                    # adjust output formatting
                    value = self._adjust_output(field, value)

                    line.append(value)

                hsp_line = "\t".join(line)
                qresult_lines += hsp_line + "\n"

        return qresult_lines

    def _adjust_coords(self, field, value, hsp):
        """Adjust start and end coordinates according to strand (PRIVATE)."""
        assert field in ("qstart", "qend", "sstart", "send")
        # determine sequence type to operate on based on field's first letter
        seq_type = "query" if field.startswith("q") else "hit"

        strand = getattr(hsp, "%s_strand" % seq_type, None)
        if strand is None:
            raise ValueError(
                "Required attribute %r not found." % ("%s_strand" % (seq_type))
            )
        # switch start <--> end coordinates if strand is -1
        if strand < 0:
            if field.endswith("start"):
                value = getattr(hsp, "%s_end" % seq_type)
            elif field.endswith("end"):
                value = getattr(hsp, "%s_start" % seq_type) + 1
        elif field.endswith("start"):
            # adjust start coordinate for positive strand
            value += 1

        return value

    def _adjust_output(self, field, value):
        """Adjust formatting of given field and value to mimic native tab output (PRIVATE)."""
        # qseq and sseq are stored as SeqRecord, but here we only need the str
        if field in ("qseq", "sseq"):
            value = str(value.seq)

        # evalue formatting, adapted from BLAST+ source:
        # src/objtools/align_format/align_format_util.cpp#L668
        elif field == "evalue":
            if value < 1.0e-180:
                value = "0.0"
            elif value < 1.0e-99:
                value = "%2.0e" % value
            elif value < 0.0009:
                value = "%3.0e" % value
            elif value < 0.1:
                value = "%4.3f" % value
            elif value < 1.0:
                value = "%3.2f" % value
            elif value < 10.0:
                value = "%2.1f" % value
            else:
                value = "%5.0f" % value

        # pident and ppos formatting
        elif field in ("pident", "ppos"):
            value = "%.2f" % value

        # evalue formatting, adapted from BLAST+ source:
        # src/objtools/align_format/align_format_util.cpp#L723
        elif field == "bitscore":
            if value > 9999:
                value = "%4.3e" % value
            elif value > 99.9:
                value = "%4.0d" % value
            else:
                value = "%4.1f" % value

        # coverages have no comma (using floats still ~ a more proper
        # representation)
        elif field in ("qcovhsp", "qcovs"):
            value = "%.0f" % value

        # list into '<>'-delimited string
        elif field == "salltitles":
            value = "<>".join(value)

        # list into ';'-delimited string
        elif field in (
            "sallseqid",
            "sallacc",
            "staxids",
            "sscinames",
            "scomnames",
            "sblastnames",
            "sskingdoms",
        ):
            value = ";".join(value)

        # everything else
        else:
            value = str(value)

        return value

    def _build_comments(self, qres):
        """Return QueryResult tabular comment as a string (PRIVATE)."""
        comments = []
        # inverse mapping of the long-short name map, required
        # for writing comments
        inv_field_map = {v: k for k, v in _LONG_SHORT_MAP.items()}

        # try to anticipate qress without version
        program = qres.program.upper()
        try:
            version = qres.version
        except AttributeError:
            program_line = "# %s" % program
        else:
            program_line = f"# {program} {version}"
        comments.append(program_line)
        # description may or may not be None
        if qres.description is None:
            comments.append("# Query: %s" % qres.id)
        else:
            comments.append(f"# Query: {qres.id} {qres.description}")
        # try appending RID line, if present
        try:
            comments.append("# RID: %s" % qres.rid)
        except AttributeError:
            pass
        comments.append("# Database: %s" % qres.target)
        # qresults without hits don't show the Fields comment
        if qres:
            comments.append(
                "# Fields: %s"
                % ", ".join(inv_field_map[field] for field in self.fields)
            )
        comments.append("# %i hits found" % len(qres))

        return "\n".join(comments) + "\n"


# if not used as a module, run the doctest
if __name__ == "__main__":
    from Bio._utils import run_doctest

    run_doctest()
