#!/usr/bin/env python
# encoding: utf-8

"""Module contains common utilities used in automated code tests for Gensim modules.

Attributes:

module_path : str
    Full path to this module directory.

common_texts : list of list of str
    Toy dataset.

common_dictionary : :class:`~gensim.corpora.dictionary.Dictionary`
    Dictionary of toy dataset.

common_corpus : list of list of (int, int)
    Corpus of toy dataset.


Examples:

It's easy to keep objects in temporary folder and reuse'em if needed:

.. sourcecode:: pycon

    >>> from gensim.models import word2vec
    >>> from gensim.test.utils import get_tmpfile, common_texts
    >>>
    >>> model = word2vec.Word2Vec(common_texts, min_count=1)
    >>> temp_path = get_tmpfile('toy_w2v')
    >>> model.save(temp_path)
    >>>
    >>> new_model = word2vec.Word2Vec.load(temp_path)
    >>> result = new_model.wv.most_similar("human", topn=1)

Let's print first document in toy dataset and then recreate it using its corpus and dictionary.

.. sourcecode:: pycon

    >>> from gensim.test.utils import common_texts, common_dictionary, common_corpus
    >>> print(common_texts[0])
    ['human', 'interface', 'computer']
    >>> assert common_dictionary.doc2bow(common_texts[0]) == common_corpus[0]

We can find our toy set in test data directory.

.. sourcecode:: pycon

    >>> from gensim.test.utils import datapath
    >>>
    >>> with open(datapath("testcorpus.txt")) as f:
    ...     texts = [line.strip().split() for line in f]
    >>> print(texts[0])
    ['computer', 'human', 'interface']

If you don't need to keep temporary objects on disk use :func:`~gensim.test.utils.temporary_file`:

.. sourcecode:: pycon

    >>> from gensim.test.utils import temporary_file, common_corpus, common_dictionary
    >>> from gensim.models import LdaModel
    >>>
    >>> with temporary_file("temp.txt") as tf:
    ...     lda = LdaModel(common_corpus, id2word=common_dictionary, num_topics=3)
    ...     lda.save(tf)

"""

import contextlib
import tempfile
import os
import shutil

from gensim.corpora import Dictionary
from gensim.utils import simple_preprocess

module_path = os.path.dirname(__file__)  # needed because sample data files are located in the same folder


def datapath(fname):
    """Get full path for file `fname` in test data directory placed in this module directory.
    Usually used to place corpus to test_data directory.

    Parameters
    ----------
    fname : str
        Name of file.

    Returns
    -------
    str
        Full path to `fname` in test_data folder.

    Example
    -------
    Let's get path of test GloVe data file and check if it exits.

    .. sourcecode:: pycon

        >>> from gensim.corpora import MmCorpus
        >>> from gensim.test.utils import datapath
        >>>
        >>> corpus = MmCorpus(datapath("testcorpus.mm"))
        >>> for document in corpus:
        ...     pass


    """
    return os.path.join(module_path, 'test_data', fname)


def get_tmpfile(suffix):
    """Get full path to file `suffix` in temporary folder.
    This function doesn't creates file (only generate unique name).
    Also, it may return different paths in consecutive calling.

    Parameters
    ----------
    suffix : str
        Suffix of file.

    Returns
    -------
    str
        Path to `suffix` file in temporary folder.

    Examples
    --------
    Using this function we may get path to temporary file and use it, for example, to store temporary model.

    .. sourcecode:: pycon

        >>> from gensim.models import LsiModel
        >>> from gensim.test.utils import get_tmpfile, common_dictionary, common_corpus
        >>>
        >>> tmp_f = get_tmpfile("toy_lsi_model")
        >>>
        >>> model = LsiModel(common_corpus, id2word=common_dictionary)
        >>> model.save(tmp_f)
        >>>
        >>> loaded_model = LsiModel.load(tmp_f)

    """
    return os.path.join(tempfile.mkdtemp(), suffix)


@contextlib.contextmanager
def temporary_file(name=""):
    """This context manager creates file `name` in temporary directory and returns its full path.
    Temporary directory with included files will deleted at the end of context. Note, it won't create file.

    Parameters
    ----------
    name : str
        Filename.

    Yields
    ------
    str
        Path to file `name` in temporary directory.

    Examples
    --------
    This example demonstrates that created temporary directory (and included
    files) will deleted at the end of context.

    .. sourcecode:: pycon

        >>> import os
        >>> from gensim.test.utils import temporary_file
        >>> with temporary_file("temp.txt") as tf, open(tf, 'w') as outfile:
        ...     outfile.write("my extremely useful information")
        ...     print("Is this file exists? {}".format(os.path.exists(tf)))
        ...     print("Is this folder exists? {}".format(os.path.exists(os.path.dirname(tf))))
        Is this file exists? True
        Is this folder exists? True
        >>>
        >>> print("Is this file exists? {}".format(os.path.exists(tf)))
        Is this file exists? False
        >>> print("Is this folder exists? {}".format(os.path.exists(os.path.dirname(tf))))
        Is this folder exists? False

    """

    # note : when dropping python2.7 support, we can use tempfile.TemporaryDirectory
    tmp = tempfile.mkdtemp()
    try:
        yield os.path.join(tmp, name)
    finally:
        shutil.rmtree(tmp, ignore_errors=True)


# set up vars used in testing ("Deerwester" from the web tutorial)
common_texts = [
    ['human', 'interface', 'computer'],
    ['survey', 'user', 'computer', 'system', 'response', 'time'],
    ['eps', 'user', 'interface', 'system'],
    ['system', 'human', 'system', 'eps'],
    ['user', 'response', 'time'],
    ['trees'],
    ['graph', 'trees'],
    ['graph', 'minors', 'trees'],
    ['graph', 'minors', 'survey']
]

common_dictionary = Dictionary(common_texts)
common_corpus = [common_dictionary.doc2bow(text) for text in common_texts]


class LeeCorpus:
    def __iter__(self):
        with open(datapath('lee_background.cor')) as f:
            for line in f:
                yield simple_preprocess(line)


lee_corpus_list = list(LeeCorpus())
