| """Tests for the evaluation framework.""" |
|
|
| import pytest |
| from src.evaluation.gleu_scorer import GLEUScorer |
|
|
|
|
| def test_gleu_scorer_instantiation(): |
| """Test that GLEU scorer can be created.""" |
| scorer = GLEUScorer() |
| assert scorer is not None |
|
|
|
|
| def test_gleu_perfect_score(): |
| """Test that identical predictions and references score high.""" |
| scorer = GLEUScorer() |
| preds = ["The cat sat on the mat.", "Hello world."] |
| refs = ["The cat sat on the mat.", "Hello world."] |
| score = scorer.compute_gleu(preds, refs) |
| assert score > 90.0 |
|
|
|
|
| def test_gleu_empty_input(): |
| """Test empty input handling.""" |
| scorer = GLEUScorer() |
| assert scorer.compute_gleu([], []) == 0.0 |
|
|
|
|
| def test_awl_coverage_score(): |
| """Test AWL coverage scoring.""" |
| from src.vocabulary.awl_loader import AWLLoader |
| from src.style.fingerprinter import StyleFingerprinter |
| from src.evaluation.style_metrics import StyleEvaluator |
| import tempfile, os |
|
|
| with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f: |
| f.write("analysis\nresearch\nmethod\nsignificant\nestablish\n") |
| awl_path = f.name |
|
|
| try: |
| awl = AWLLoader(primary_path=awl_path, synonyms_path=None) |
| fp = StyleFingerprinter(spacy_model="en_core_web_sm", awl_path=awl_path) |
| evaluator = StyleEvaluator(fp, awl) |
| coverage = evaluator.awl_coverage("The analysis shows significant research results.") |
| assert 0.0 <= coverage <= 1.0 |
| finally: |
| os.unlink(awl_path) |
|
|