from nltk.translate.ribes_score import corpus_ribes, word_rank_alignment


def test_ribes_empty_worder():  # worder as in word order
    # Verifies that these two sentences have no alignment,
    # and hence have the lowest possible RIBES score.
    hyp = "This is a nice sentence which I quite like".split()
    ref = "Okay well that's neat and all but the reference's different".split()

    assert word_rank_alignment(ref, hyp) == []

    list_of_refs = [[ref]]
    hypotheses = [hyp]
    assert corpus_ribes(list_of_refs, hypotheses) == 0.0


def test_ribes_one_worder():
    # Verifies that these two sentences have just one match,
    # and the RIBES score for this sentence with very little
    # correspondence is 0.
    hyp = "This is a nice sentence which I quite like".split()
    ref = "Okay well that's nice and all but the reference's different".split()

    assert word_rank_alignment(ref, hyp) == [3]

    list_of_refs = [[ref]]
    hypotheses = [hyp]
    assert corpus_ribes(list_of_refs, hypotheses) == 0.0


def test_ribes_two_worder():
    # Verifies that these two sentences have two matches,
    # but still get the lowest possible RIBES score due
    # to the lack of similarity.
    hyp = "This is a nice sentence which I quite like".split()
    ref = "Okay well that's nice and all but the reference is different".split()

    assert word_rank_alignment(ref, hyp) == [9, 3]

    list_of_refs = [[ref]]
    hypotheses = [hyp]
    assert corpus_ribes(list_of_refs, hypotheses) == 0.0


def test_ribes():
    # Based on the doctest of the corpus_ribes function
    hyp1 = [
        "It",
        "is",
        "a",
        "guide",
        "to",
        "action",
        "which",
        "ensures",
        "that",
        "the",
        "military",
        "always",
        "obeys",
        "the",
        "commands",
        "of",
        "the",
        "party",
    ]
    ref1a = [
        "It",
        "is",
        "a",
        "guide",
        "to",
        "action",
        "that",
        "ensures",
        "that",
        "the",
        "military",
        "will",
        "forever",
        "heed",
        "Party",
        "commands",
    ]
    ref1b = [
        "It",
        "is",
        "the",
        "guiding",
        "principle",
        "which",
        "guarantees",
        "the",
        "military",
        "forces",
        "always",
        "being",
        "under",
        "the",
        "command",
        "of",
        "the",
        "Party",
    ]
    ref1c = [
        "It",
        "is",
        "the",
        "practical",
        "guide",
        "for",
        "the",
        "army",
        "always",
        "to",
        "heed",
        "the",
        "directions",
        "of",
        "the",
        "party",
    ]

    hyp2 = [
        "he",
        "read",
        "the",
        "book",
        "because",
        "he",
        "was",
        "interested",
        "in",
        "world",
        "history",
    ]
    ref2a = [
        "he",
        "was",
        "interested",
        "in",
        "world",
        "history",
        "because",
        "he",
        "read",
        "the",
        "book",
    ]

    list_of_refs = [[ref1a, ref1b, ref1c], [ref2a]]
    hypotheses = [hyp1, hyp2]

    score = corpus_ribes(list_of_refs, hypotheses)

    assert round(score, 4) == 0.3597


def test_no_zero_div():
    # Regression test for Issue 2529, assure that no ZeroDivisionError is thrown.
    hyp1 = [
        "It",
        "is",
        "a",
        "guide",
        "to",
        "action",
        "which",
        "ensures",
        "that",
        "the",
        "military",
        "always",
        "obeys",
        "the",
        "commands",
        "of",
        "the",
        "party",
    ]
    ref1a = [
        "It",
        "is",
        "a",
        "guide",
        "to",
        "action",
        "that",
        "ensures",
        "that",
        "the",
        "military",
        "will",
        "forever",
        "heed",
        "Party",
        "commands",
    ]
    ref1b = [
        "It",
        "is",
        "the",
        "guiding",
        "principle",
        "which",
        "guarantees",
        "the",
        "military",
        "forces",
        "always",
        "being",
        "under",
        "the",
        "command",
        "of",
        "the",
        "Party",
    ]
    ref1c = [
        "It",
        "is",
        "the",
        "practical",
        "guide",
        "for",
        "the",
        "army",
        "always",
        "to",
        "heed",
        "the",
        "directions",
        "of",
        "the",
        "party",
    ]

    hyp2 = ["he", "read", "the"]
    ref2a = ["he", "was", "interested", "in", "world", "history", "because", "he"]

    list_of_refs = [[ref1a, ref1b, ref1c], [ref2a]]
    hypotheses = [hyp1, hyp2]

    score = corpus_ribes(list_of_refs, hypotheses)

    assert round(score, 4) == 0.1688