import json
import textwrap
import copy
from collections import OrderedDict

import numpy as np
import yaml

__all__ = ['get_header_from_yaml', 'get_yaml_from_header', 'get_yaml_from_table']


class ColumnOrderList(list):
    """
    List of tuples that sorts in a specific order that makes sense for
    astropy table column attributes.
    """

    def sort(self, *args, **kwargs):
        super().sort()

        column_keys = ['name', 'unit', 'datatype', 'format', 'description', 'meta']
        in_dict = dict(self)
        out_list = []

        for key in column_keys:
            if key in in_dict:
                out_list.append((key, in_dict[key]))
        for key, val in self:
            if key not in column_keys:
                out_list.append((key, val))

        # Clear list in-place
        del self[:]

        self.extend(out_list)


class ColumnDict(dict):
    """
    Specialized dict subclass to represent attributes of a Column
    and return items() in a preferred order.  This is only for use
    in generating a YAML map representation that has a fixed order.
    """

    def items(self):
        """
        Return items as a ColumnOrderList, which sorts in the preferred
        way for column attributes.
        """
        return ColumnOrderList(super().items())


def _construct_odict(load, node):
    """
    Construct OrderedDict from !!omap in yaml safe load.

    Source: https://gist.github.com/weaver/317164
    License: Unspecified

    This is the same as SafeConstructor.construct_yaml_omap(),
    except the data type is changed to OrderedDict() and setitem is
    used instead of append in the loop

    Examples
    --------
    ::

      >>> yaml.load('''  # doctest: +SKIP
      ... !!omap
      ... - foo: bar
      ... - mumble: quux
      ... - baz: gorp
      ... ''')
      OrderedDict([('foo', 'bar'), ('mumble', 'quux'), ('baz', 'gorp')])

      >>> yaml.load('''!!omap [ foo: bar, mumble: quux, baz : gorp ]''')  # doctest: +SKIP
      OrderedDict([('foo', 'bar'), ('mumble', 'quux'), ('baz', 'gorp')])
    """
    omap = OrderedDict()
    yield omap
    if not isinstance(node, yaml.SequenceNode):
        raise yaml.constructor.ConstructorError(
            "while constructing an ordered map", node.start_mark,
            f"expected a sequence, but found {node.id}", node.start_mark)

    for subnode in node.value:
        if not isinstance(subnode, yaml.MappingNode):
            raise yaml.constructor.ConstructorError(
                "while constructing an ordered map", node.start_mark,
                f"expected a mapping of length 1, but found {subnode.id}",
                subnode.start_mark)

        if len(subnode.value) != 1:
            raise yaml.constructor.ConstructorError(
                "while constructing an ordered map", node.start_mark,
                f"expected a single mapping item, but found {len(subnode.value)} items",
                subnode.start_mark)

        key_node, value_node = subnode.value[0]
        key = load.construct_object(key_node)
        value = load.construct_object(value_node)
        omap[key] = value


def _repr_pairs(dump, tag, sequence, flow_style=None):
    """
    This is the same code as BaseRepresenter.represent_sequence(),
    but the value passed to dump.represent_data() in the loop is a
    dictionary instead of a tuple.

    Source: https://gist.github.com/weaver/317164
    License: Unspecified
    """
    value = []
    node = yaml.SequenceNode(tag, value, flow_style=flow_style)
    if dump.alias_key is not None:
        dump.represented_objects[dump.alias_key] = node
    best_style = True
    for (key, val) in sequence:
        item = dump.represent_data({key: val})
        if not (isinstance(item, yaml.ScalarNode) and not item.style):
            best_style = False
        value.append(item)
    if flow_style is None:
        if dump.default_flow_style is not None:
            node.flow_style = dump.default_flow_style
        else:
            node.flow_style = best_style
    return node


def _repr_odict(dumper, data):
    """
    Represent OrderedDict in yaml dump.

    Source: https://gist.github.com/weaver/317164
    License: Unspecified

    >>> data = OrderedDict([('foo', 'bar'), ('mumble', 'quux'), ('baz', 'gorp')])
    >>> yaml.dump(data, default_flow_style=False)  # doctest: +SKIP
    '!!omap\\n- foo: bar\\n- mumble: quux\\n- baz: gorp\\n'
    >>> yaml.dump(data, default_flow_style=True)  # doctest: +SKIP
    '!!omap [foo: bar, mumble: quux, baz: gorp]\\n'
    """
    return _repr_pairs(dumper, 'tag:yaml.org,2002:omap', data.items())


def _repr_column_dict(dumper, data):
    """
    Represent ColumnDict in yaml dump.

    This is the same as an ordinary mapping except that the keys
    are written in a fixed order that makes sense for astropy table
    columns.
    """
    return dumper.represent_mapping('tag:yaml.org,2002:map', data)


def _get_variable_length_array_shape(col):
    """Check if object-type ``col`` is really a variable length list.

    That is true if the object consists purely of list of nested lists, where
    the shape of every item can be represented as (m, n, ..., *) where the (m,
    n, ...) are constant and only the lists in the last axis have variable
    shape. If so the returned value of shape will be a tuple in the form (m, n,
    ..., None).

    If ``col`` is a variable length array then the return ``dtype`` corresponds
    to the type found by numpy for all the individual values. Otherwise it will
    be ``np.dtype(object)``.

    Parameters
    ==========
    col : column-like
        Input table column, assumed to be object-type

    Returns
    =======
    shape : tuple
        Inferred variable length shape or None
    dtype : np.dtype
        Numpy dtype that applies to col
    """
    class ConvertError(ValueError):
        """Local conversion error used below"""

    # Numpy types supported as variable-length arrays
    np_classes = (np.floating, np.integer, np.bool_, np.unicode_)

    try:
        if len(col) == 0 or not all(isinstance(val, np.ndarray) for val in col):
            raise ConvertError
        dtype = col[0].dtype
        shape = col[0].shape[:-1]
        for val in col:
            if not issubclass(val.dtype.type, np_classes) or val.shape[:-1] != shape:
                raise ConvertError
            dtype = np.promote_types(dtype, val.dtype)
        shape = shape + (None,)

    except ConvertError:
        # `col` is not a variable length array, return shape and dtype to
        #  the original. Note that this function is only called if
        #  col.shape[1:] was () and col.info.dtype is object.
        dtype = col.info.dtype
        shape = ()

    return shape, dtype


def _get_datatype_from_dtype(dtype):
    """Return string version of ``dtype`` for writing to ECSV ``datatype``"""
    datatype = dtype.name
    if datatype.startswith(('bytes', 'str')):
        datatype = 'string'
    if datatype.endswith('_'):
        datatype = datatype[:-1]  # string_ and bool_ lose the final _ for ECSV
    return datatype


def _get_col_attributes(col):
    """
    Extract information from a column (apart from the values) that is required
    to fully serialize the column.

    Parameters
    ----------
    col : column-like
        Input Table column

    Returns
    -------
    attrs : dict
        Dict of ECSV attributes for ``col``
    """
    dtype = col.info.dtype  # Type of column values that get written
    subtype = None  # Type of data for object columns serialized with JSON
    shape = col.shape[1:]  # Shape of multidim / variable length columns

    if dtype.name == 'object':
        if shape == ():
            # 1-d object type column might be a variable length array
            dtype = np.dtype(str)
            shape, subtype = _get_variable_length_array_shape(col)
        else:
            # N-d object column is subtype object but serialized as JSON string
            dtype = np.dtype(str)
            subtype = np.dtype(object)
    elif shape:
        # N-d column which is not object is serialized as JSON string
        dtype = np.dtype(str)
        subtype = col.info.dtype

    datatype = _get_datatype_from_dtype(dtype)

    # Set the output attributes
    attrs = ColumnDict()
    attrs['name'] = col.info.name
    attrs['datatype'] = datatype
    for attr, nontrivial, xform in (('unit', lambda x: x is not None, str),
                                    ('format', lambda x: x is not None, None),
                                    ('description', lambda x: x is not None, None),
                                    ('meta', lambda x: x, None)):
        col_attr = getattr(col.info, attr)
        if nontrivial(col_attr):
            attrs[attr] = xform(col_attr) if xform else col_attr

    if subtype:
        attrs['subtype'] = _get_datatype_from_dtype(subtype)
        # Numpy 'object' maps to 'subtype' of 'json' in ECSV
        if attrs['subtype'] == 'object':
            attrs['subtype'] = 'json'
    if shape:
        attrs['subtype'] += json.dumps(list(shape), separators=(',', ':'))

    return attrs


def get_yaml_from_table(table):
    """
    Return lines with a YAML representation of header content from the ``table``.

    Parameters
    ----------
    table : `~astropy.table.Table` object
        Table for which header content is output

    Returns
    -------
    lines : list
        List of text lines with YAML header content
    """

    header = {'cols': list(table.columns.values())}
    if table.meta:
        header['meta'] = table.meta

    return get_yaml_from_header(header)


def get_yaml_from_header(header):
    """
    Return lines with a YAML representation of header content from a Table.

    The ``header`` dict must contain these keys:

    - 'cols' : list of table column objects (required)
    - 'meta' : table 'meta' attribute (optional)

    Other keys included in ``header`` will be serialized in the output YAML
    representation.

    Parameters
    ----------
    header : dict
        Table header content

    Returns
    -------
    lines : list
        List of text lines with YAML header content
    """
    from astropy.io.misc.yaml import AstropyDumper

    class TableDumper(AstropyDumper):
        """
        Custom Dumper that represents OrderedDict as an !!omap object.
        """

        def represent_mapping(self, tag, mapping, flow_style=None):
            """
            This is a combination of the Python 2 and 3 versions of this method
            in the PyYAML library to allow the required key ordering via the
            ColumnOrderList object.  The Python 3 version insists on turning the
            items() mapping into a list object and sorting, which results in
            alphabetical order for the column keys.
            """
            value = []
            node = yaml.MappingNode(tag, value, flow_style=flow_style)
            if self.alias_key is not None:
                self.represented_objects[self.alias_key] = node
            best_style = True
            if hasattr(mapping, 'items'):
                mapping = mapping.items()
                if hasattr(mapping, 'sort'):
                    mapping.sort()
                else:
                    mapping = list(mapping)
                    try:
                        mapping = sorted(mapping)
                    except TypeError:
                        pass

            for item_key, item_value in mapping:
                node_key = self.represent_data(item_key)
                node_value = self.represent_data(item_value)
                if not (isinstance(node_key, yaml.ScalarNode) and not node_key.style):
                    best_style = False
                if not (isinstance(node_value, yaml.ScalarNode) and not node_value.style):
                    best_style = False
                value.append((node_key, node_value))
            if flow_style is None:
                if self.default_flow_style is not None:
                    node.flow_style = self.default_flow_style
                else:
                    node.flow_style = best_style
            return node

    TableDumper.add_representer(OrderedDict, _repr_odict)
    TableDumper.add_representer(ColumnDict, _repr_column_dict)

    header = copy.copy(header)  # Don't overwrite original
    header['datatype'] = [_get_col_attributes(col) for col in header['cols']]
    del header['cols']

    lines = yaml.dump(header, default_flow_style=None,
                      Dumper=TableDumper, width=130).splitlines()
    return lines


class YamlParseError(Exception):
    pass


def get_header_from_yaml(lines):
    """
    Get a header dict from input ``lines`` which should be valid YAML.  This
    input will typically be created by get_yaml_from_header.  The output is a
    dictionary which describes all the table and column meta.

    The get_cols() method in the io/ascii/ecsv.py file should be used as a
    guide to using the information when constructing a table using this
    header dict information.

    Parameters
    ----------
    lines : list
        List of text lines with YAML header content

    Returns
    -------
    header : dict
        Dictionary describing table and column meta

    """
    from astropy.io.misc.yaml import AstropyLoader

    class TableLoader(AstropyLoader):
        """
        Custom Loader that constructs OrderedDict from an !!omap object.
        This does nothing but provide a namespace for adding the
        custom odict constructor.
        """

    TableLoader.add_constructor('tag:yaml.org,2002:omap', _construct_odict)
    # Now actually load the YAML data structure into `meta`
    header_yaml = textwrap.dedent('\n'.join(lines))
    try:
        header = yaml.load(header_yaml, Loader=TableLoader)
    except Exception as err:
        raise YamlParseError() from err

    return header