"""Tests for the evaluation framework."""

import pytest
from src.evaluation.gleu_scorer import GLEUScorer


def test_gleu_scorer_instantiation():
    """Test that GLEU scorer can be created."""
    scorer = GLEUScorer()
    assert scorer is not None


def test_gleu_perfect_score():
    """Test that identical predictions and references score high."""
    scorer = GLEUScorer()
    preds = ["The cat sat on the mat.", "Hello world."]
    refs = ["The cat sat on the mat.", "Hello world."]
    score = scorer.compute_gleu(preds, refs)
    assert score > 90.0  # Should be near-perfect


def test_gleu_empty_input():
    """Test empty input handling."""
    scorer = GLEUScorer()
    assert scorer.compute_gleu([], []) == 0.0


def test_awl_coverage_score():
    """Test AWL coverage scoring."""
    from src.vocabulary.awl_loader import AWLLoader
    from src.style.fingerprinter import StyleFingerprinter
    from src.evaluation.style_metrics import StyleEvaluator
    import tempfile, os

    with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:
        f.write("analysis\nresearch\nmethod\nsignificant\nestablish\n")
        awl_path = f.name

    try:
        awl = AWLLoader(primary_path=awl_path, synonyms_path=None)
        fp = StyleFingerprinter(spacy_model="en_core_web_sm", awl_path=awl_path)
        evaluator = StyleEvaluator(fp, awl)
        coverage = evaluator.awl_coverage("The analysis shows significant research results.")
        assert 0.0 <= coverage <= 1.0
    finally:
        os.unlink(awl_path)