"""Unit tests for the lightweight evaluator pieces (ROUGE accuracy and text helpers). The embedding/NLI metrics need model downloads, so they are validated manually, not in CI.""" import numpy as np from src.evaluator import accuracy_score, _split_sentences, _softmax class TestAccuracyScore: def test_identical_text_scores_high(self): assert accuracy_score("the cat sat on the mat", "the cat sat on the mat") > 0.9 def test_unrelated_text_scores_low(self): assert accuracy_score("quantum physics equations", "a recipe for pasta") < 0.3 def test_empty_inputs_return_zero(self): assert accuracy_score("", "something") == 0.0 assert accuracy_score("something", "") == 0.0 def test_score_is_bounded(self): s = accuracy_score("partial overlap here", "some partial overlap") assert 0.0 <= s <= 1.0 class TestSplitSentences: def test_splits_on_punctuation(self): sents = _split_sentences( "This is the first sentence. Here is the second one! And this is the third question?" ) assert len(sents) == 3 def test_drops_tiny_fragments(self): # fragments <= 15 chars are dropped assert _split_sentences("Ok. This is a long enough sentence to keep.") == [ "This is a long enough sentence to keep." ] class TestSoftmax: def test_rows_sum_to_one(self): probs = _softmax(np.array([[1.0, 2.0, 3.0]])) assert abs(probs.sum() - 1.0) < 1e-6 def test_handles_1d_input(self): probs = _softmax(np.array([0.0, 0.0])) assert probs.shape == (1, 2) assert abs(probs.sum() - 1.0) < 1e-6