| """Unit tests for the lightweight evaluator pieces (ROUGE accuracy and text helpers). |
| The embedding/NLI metrics need model downloads, so they are validated manually, not in CI.""" |
|
|
| import numpy as np |
| from src.evaluator import accuracy_score, _split_sentences, _softmax |
|
|
|
|
| class TestAccuracyScore: |
| def test_identical_text_scores_high(self): |
| assert accuracy_score("the cat sat on the mat", "the cat sat on the mat") > 0.9 |
|
|
| def test_unrelated_text_scores_low(self): |
| assert accuracy_score("quantum physics equations", "a recipe for pasta") < 0.3 |
|
|
| def test_empty_inputs_return_zero(self): |
| assert accuracy_score("", "something") == 0.0 |
| assert accuracy_score("something", "") == 0.0 |
|
|
| def test_score_is_bounded(self): |
| s = accuracy_score("partial overlap here", "some partial overlap") |
| assert 0.0 <= s <= 1.0 |
|
|
|
|
| class TestSplitSentences: |
| def test_splits_on_punctuation(self): |
| sents = _split_sentences( |
| "This is the first sentence. Here is the second one! And this is the third question?" |
| ) |
| assert len(sents) == 3 |
|
|
| def test_drops_tiny_fragments(self): |
| |
| assert _split_sentences("Ok. This is a long enough sentence to keep.") == [ |
| "This is a long enough sentence to keep." |
| ] |
|
|
|
|
| class TestSoftmax: |
| def test_rows_sum_to_one(self): |
| probs = _softmax(np.array([[1.0, 2.0, 3.0]])) |
| assert abs(probs.sum() - 1.0) < 1e-6 |
|
|
| def test_handles_1d_input(self): |
| probs = _softmax(np.array([0.0, 0.0])) |
| assert probs.shape == (1, 2) |
| assert abs(probs.sum() - 1.0) < 1e-6 |
|
|