Spaces:
Sleeping
Sleeping
| """Tests unitaires pour le module picarones.core.metrics.""" | |
| import pytest | |
| from picarones.core.metrics import aggregate_metrics, compute_metrics, MetricsResult | |
| class TestComputeMetrics: | |
| """Tests de compute_metrics sur des cas connus.""" | |
| def test_perfect_match(self): | |
| """CER et WER doivent être 0 quand référence == hypothèse.""" | |
| result = compute_metrics("Bonjour le monde", "Bonjour le monde") | |
| assert result.cer == pytest.approx(0.0) | |
| assert result.wer == pytest.approx(0.0) | |
| assert result.error is None | |
| def test_complete_mismatch(self): | |
| """CER proche de 1 quand les textes sont totalement différents.""" | |
| result = compute_metrics("abc", "xyz") | |
| assert result.cer > 0.0 | |
| assert result.error is None | |
| def test_empty_reference(self): | |
| """Référence vide : CER = 1.0 si hypothèse non vide.""" | |
| result = compute_metrics("", "quelque chose") | |
| assert result.cer == pytest.approx(1.0) | |
| def test_empty_both(self): | |
| """Référence et hypothèse vides : CER = 0.0.""" | |
| result = compute_metrics("", "") | |
| assert result.cer == pytest.approx(0.0) | |
| def test_single_substitution(self): | |
| """Une seule substitution sur 4 chars → CER = 0.25.""" | |
| result = compute_metrics("abcd", "abce") | |
| assert result.cer == pytest.approx(0.25) | |
| def test_case_insensitive_cer(self): | |
| """CER caseless ignore les différences de casse.""" | |
| result = compute_metrics("Bonjour", "bonjour") | |
| assert result.cer_caseless == pytest.approx(0.0) | |
| # CER brut doit être > 0 (B ≠ b) | |
| assert result.cer > 0.0 | |
| def test_nfc_normalization(self): | |
| """CER NFC normalise les séquences unicode équivalentes.""" | |
| # é peut être encodé en forme composée (U+00E9) ou décomposée (e + U+0301) | |
| composed = "\u00e9" # é (NFC) | |
| decomposed = "e\u0301" # e + combining accent (NFD) | |
| result = compute_metrics(composed, decomposed) | |
| # Après NFC, les deux sont identiques → cer_nfc = 0 | |
| assert result.cer_nfc == pytest.approx(0.0) | |
| def test_wer_one_word_wrong(self): | |
| """WER = 1/3 pour 1 mot faux sur 3.""" | |
| result = compute_metrics("le chat dort", "le chien dort") | |
| assert result.wer == pytest.approx(1 / 3, rel=1e-2) | |
| def test_result_has_lengths(self): | |
| ref = "Texte de référence" | |
| result = compute_metrics(ref, "Texte différent") | |
| assert result.reference_length == len(ref) | |
| assert result.hypothesis_length > 0 | |
| def test_metrics_result_as_dict(self): | |
| """as_dict() doit retourner toutes les clés attendues.""" | |
| result = compute_metrics("abc", "abc") | |
| d = result.as_dict() | |
| for key in ["cer", "cer_nfc", "cer_caseless", "wer", "wer_normalized", "mer", "wil"]: | |
| assert key in d | |
| def test_cer_percent_property(self): | |
| result = compute_metrics("abcd", "abce") | |
| assert result.cer_percent == pytest.approx(25.0, rel=1e-2) | |
| # ── Bug fix : hypothèse vide → CER doit être 1.0, pas 0.0 (bug sprint 13) ── | |
| def test_empty_hypothesis_cer_is_one(self): | |
| """Hypothèse vide avec référence non vide doit donner CER=1.0.""" | |
| result = compute_metrics("Bonjour le monde", "") | |
| assert result.cer == pytest.approx(1.0), ( | |
| f"CER attendu 1.0 pour hypothèse vide, obtenu {result.cer}" | |
| ) | |
| assert result.error is None | |
| def test_empty_hypothesis_wer_is_one(self): | |
| """WER doit être 1.0 pour hypothèse vide (pas de ZeroDivisionError).""" | |
| result = compute_metrics("hello world", "") | |
| assert result.wer == pytest.approx(1.0) | |
| assert result.mer == pytest.approx(1.0) | |
| assert result.wil == pytest.approx(1.0) | |
| assert result.error is None | |
| def test_empty_hypothesis_whitespace_is_treated_as_empty(self): | |
| """Hypothèse avec uniquement des espaces est traitée comme vide.""" | |
| result = compute_metrics("Bonjour", " ") | |
| assert result.cer == pytest.approx(1.0) | |
| def test_empty_hypothesis_hypothesis_length_is_zero(self): | |
| """hypothesis_length doit être 0 pour hypothèse vide.""" | |
| result = compute_metrics("Bonjour le monde", "") | |
| assert result.hypothesis_length == 0 | |
| class TestAggregateMetrics: | |
| """Tests de aggregate_metrics.""" | |
| def _make_result(self, cer: float) -> MetricsResult: | |
| return MetricsResult( | |
| cer=cer, cer_nfc=cer, cer_caseless=cer, | |
| wer=cer, wer_normalized=cer, mer=cer, wil=cer, | |
| reference_length=100, | |
| hypothesis_length=100, | |
| ) | |
| def test_empty_list(self): | |
| assert aggregate_metrics([]) == {} | |
| def test_single_result(self): | |
| results = [self._make_result(0.1)] | |
| agg = aggregate_metrics(results) | |
| assert agg["cer"]["mean"] == pytest.approx(0.1) | |
| assert agg["cer"]["min"] == pytest.approx(0.1) | |
| assert agg["cer"]["max"] == pytest.approx(0.1) | |
| def test_multiple_results(self): | |
| results = [self._make_result(0.1), self._make_result(0.3)] | |
| agg = aggregate_metrics(results) | |
| assert agg["cer"]["mean"] == pytest.approx(0.2) | |
| assert agg["document_count"] == 2 | |
| assert agg["failed_count"] == 0 | |
| def test_failed_results_excluded(self): | |
| ok = self._make_result(0.1) | |
| failed = MetricsResult( | |
| cer=1.0, cer_nfc=1.0, cer_caseless=1.0, | |
| wer=1.0, wer_normalized=1.0, mer=1.0, wil=1.0, | |
| reference_length=50, hypothesis_length=0, | |
| error="Moteur en erreur", | |
| ) | |
| agg = aggregate_metrics([ok, failed]) | |
| # Les métriques agrégées n'incluent que les résultats sans erreur | |
| assert agg["cer"]["mean"] == pytest.approx(0.1) | |
| assert agg["failed_count"] == 1 | |