File size: 1,546 Bytes
3df5819 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 | """Tests for the evaluation framework."""
import pytest
from src.evaluation.gleu_scorer import GLEUScorer
def test_gleu_scorer_instantiation():
"""Test that GLEU scorer can be created."""
scorer = GLEUScorer()
assert scorer is not None
def test_gleu_perfect_score():
"""Test that identical predictions and references score high."""
scorer = GLEUScorer()
preds = ["The cat sat on the mat.", "Hello world."]
refs = ["The cat sat on the mat.", "Hello world."]
score = scorer.compute_gleu(preds, refs)
assert score > 90.0 # Should be near-perfect
def test_gleu_empty_input():
"""Test empty input handling."""
scorer = GLEUScorer()
assert scorer.compute_gleu([], []) == 0.0
def test_awl_coverage_score():
"""Test AWL coverage scoring."""
from src.vocabulary.awl_loader import AWLLoader
from src.style.fingerprinter import StyleFingerprinter
from src.evaluation.style_metrics import StyleEvaluator
import tempfile, os
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
f.write("analysis\nresearch\nmethod\nsignificant\nestablish\n")
awl_path = f.name
try:
awl = AWLLoader(primary_path=awl_path, synonyms_path=None)
fp = StyleFingerprinter(spacy_model="en_core_web_sm", awl_path=awl_path)
evaluator = StyleEvaluator(fp, awl)
coverage = evaluator.awl_coverage("The analysis shows significant research results.")
assert 0.0 <= coverage <= 1.0
finally:
os.unlink(awl_path)
|