Spaces:
Running
Running
| """Tests Sprint 52 β mΓ©triques de lisibilitΓ© (Flesch). | |
| Couvre : | |
| 1. ``count_syllables_word`` : heuristique sur des cas variΓ©s | |
| (mots vides, sans voyelle, avec diacritiques, multi-syllabe). | |
| 2. ``count_words`` / ``count_sentences`` : tokenisation simple, | |
| gestion des cas sans ponctuation finale. | |
| 3. ``flesch_score`` : | |
| - texte vide β 0 | |
| - score bornΓ© dans [0, 100] | |
| - cohΓ©rence : phrase simple > phrase complexe | |
| - diffΓ©rence FR vs EN (coefficients distincts) | |
| 4. ``flesch_delta`` : | |
| - GT = OCR β 0 | |
| - OCR modernisΓ© (LLM) β delta positif | |
| - OCR dΓ©gradΓ© (caractΓ¨res cassΓ©s) β delta nΓ©gatif | |
| 5. **Cas d'usage rΓ©aliste** : un GT historique long et complexe vs | |
| un OCR/LLM simplifiΓ© β delta clairement positif (>15 pts). | |
| 6. Garde-fous : langue invalide, textes ne contenant que de la | |
| ponctuation. | |
| 7. Enregistrement dans le registre typΓ© Sprint 34 β la jonction | |
| ``(TEXT, TEXT)`` retourne bien ``flesch_delta_fr`` et | |
| ``flesch_delta_en``. | |
| """ | |
| from __future__ import annotations | |
| import pytest | |
| from picarones.evaluation.metric_registry import select_metrics | |
| from picarones.domain.artifacts import ArtifactType | |
| from picarones.evaluation.metrics.readability import ( | |
| count_sentences, | |
| count_syllables, | |
| count_syllables_word, | |
| count_words, | |
| flesch_delta, | |
| flesch_score, | |
| ) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 1. Compteur de syllabes | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestSyllableCounting: | |
| def test_empty_word_returns_zero(self) -> None: | |
| assert count_syllables_word("") == 0 | |
| def test_word_without_vowel_returns_one(self) -> None: | |
| # Convention : un mot sans voyelle compte au moins 1 syllabe | |
| # (utile pour les acronymes type "BNF", "ALTO"). | |
| assert count_syllables_word("BNF") == 1 | |
| assert count_syllables_word("xyz") == 1 | |
| def test_single_vowel(self) -> None: | |
| assert count_syllables_word("a") == 1 | |
| assert count_syllables_word("Γ©") == 1 | |
| def test_simple_words(self) -> None: | |
| # Heuristique groupes de voyelles consΓ©cutives | |
| assert count_syllables_word("chat") == 1 # 1 groupe : a | |
| assert count_syllables_word("chien") == 1 # 1 groupe : ie | |
| assert count_syllables_word("Γ©cole") == 3 # Γ©-o-e | |
| def test_diacritics_counted_as_vowels(self) -> None: | |
| # Les voyelles accentuΓ©es doivent Γͺtre reconnues | |
| s_with = count_syllables_word("Γ©tΓ©") | |
| s_without = count_syllables_word("ete") | |
| # MΓͺmes groupes de voyelles, mΓͺme nombre de syllabes | |
| assert s_with == s_without | |
| def test_count_syllables_sums_over_words(self) -> None: | |
| text = "le chat noir" | |
| assert count_syllables(text) == ( | |
| count_syllables_word("le") | |
| + count_syllables_word("chat") | |
| + count_syllables_word("noir") | |
| ) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 2. Comptage mots / phrases | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestTokenCounting: | |
| def test_empty_text(self) -> None: | |
| assert count_words("") == 0 | |
| assert count_sentences("") == 0 | |
| def test_simple_words(self) -> None: | |
| assert count_words("le chat noir") == 3 | |
| def test_apostrophe_treated_as_word_char(self) -> None: | |
| # "l'amour" peut compter 1 ou 2 selon la convention. On | |
| # documente ici le comportement rΓ©el (1 token) pour fixer la | |
| # ref β peu important tant qu'on est cohΓ©rent. | |
| assert count_words("l'amour") == 1 | |
| def test_sentence_split_basic(self) -> None: | |
| assert count_sentences("Premier. Deuxième. Troisième.") == 3 | |
| def test_sentence_split_with_question_and_exclam(self) -> None: | |
| assert count_sentences("Allez ! Vraiment ? Oui.") == 3 | |
| def test_no_final_punctuation_counts_as_one(self) -> None: | |
| # Un texte sans point final compte tout de mΓͺme comme 1 phrase | |
| # (Γ©vite division par zΓ©ro dans Flesch). | |
| assert count_sentences("texte sans point final") == 1 | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 3. Score Flesch | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestFleschScore: | |
| def test_empty_text_returns_zero(self) -> None: | |
| assert flesch_score("", lang="fr") == 0.0 | |
| assert flesch_score("", lang="en") == 0.0 | |
| def test_score_is_bounded(self) -> None: | |
| # Phrase très simple | |
| s = flesch_score("Le chat. Le chien.", lang="fr") | |
| assert 0.0 <= s <= 100.0 | |
| # Phrase très complexe (mots longs, peu de phrases) | |
| s2 = flesch_score( | |
| "L'Γ©tablissement de l'historiographie mΓ©diΓ©vale " | |
| "contemporaine prΓ©sente d'importantes difficultΓ©s " | |
| "Γ©pistΓ©mologiques", | |
| lang="fr", | |
| ) | |
| assert 0.0 <= s2 <= 100.0 | |
| def test_simple_higher_than_complex(self) -> None: | |
| simple = "Le chat est noir. Le chien est blanc." | |
| complex_text = ( | |
| "L'Γ©tablissement de l'historiographie mΓ©diΓ©vale " | |
| "contemporaine prΓ©sente d'importantes difficultΓ©s " | |
| "Γ©pistΓ©mologiques pour les chercheurs spΓ©cialisΓ©s." | |
| ) | |
| assert flesch_score(simple, "fr") > flesch_score(complex_text, "fr") | |
| def test_fr_and_en_differ(self) -> None: | |
| # Sur un texte de complexitΓ© intermΓ©diaire (qui ne sature ni Γ | |
| # 0 ni Γ 100), FR et EN donnent des scores diffΓ©rents β | |
| # coefficients distincts sur le ratio syllabes/mots | |
| # (73.6 FR vs 84.6 EN). | |
| text = ( | |
| "Le chat noir traverse la rue. Le chien blanc dort sous " | |
| "l arbre. Les amis jouent ensemble dans le jardin pendant " | |
| "que le soleil brille au dessus de la colline." | |
| ) | |
| s_fr = flesch_score(text, "fr") | |
| s_en = flesch_score(text, "en") | |
| # Les deux scores doivent Γͺtre dans la plage non saturΓ©e et | |
| # diffΓ©rer par les coefficients. | |
| assert 0.0 < s_fr < 100.0 | |
| assert 0.0 < s_en < 100.0 | |
| assert s_fr != s_en | |
| def test_invalid_lang_raises(self) -> None: | |
| with pytest.raises(ValueError, match="Langue"): | |
| flesch_score("test", lang="es") # type: ignore[arg-type] | |
| def test_only_punctuation_returns_zero(self) -> None: | |
| assert flesch_score("...!!!???", lang="fr") == 0.0 | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 4-5. Delta Flesch | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestFleschDelta: | |
| def test_identical_texts_zero_delta(self) -> None: | |
| text = "Le chat est noir. Le chien est blanc." | |
| assert flesch_delta(text, text, "fr") == 0.0 | |
| def test_empty_texts_zero_delta(self) -> None: | |
| assert flesch_delta("", "", "fr") == 0.0 | |
| def test_realistic_modernization_yields_positive_delta(self) -> None: | |
| """Cas d'usage clΓ© : LLM modernise un texte historique β | |
| signal positif clair pour le dΓ©tecteur d'over-normalisation.""" | |
| gt_old = ( | |
| "Je vous envoie cette missive afin de vous informer " | |
| "de la situation Γ la cour, oΓΉ plusieurs nouvelles " | |
| "mΓ©ritent votre attention." | |
| ) | |
| ocr_modern = ( | |
| "Je vous Γ©cris cette lettre pour vous parler de la " | |
| "situation Γ la cour. Plusieurs nouvelles sont importantes." | |
| ) | |
| delta = flesch_delta(gt_old, ocr_modern, "fr") | |
| # Le LLM modernisant doit produire un delta nettement positif | |
| # (phrases plus courtes + mots plus simples). | |
| assert delta > 10.0, f"Delta attendu > 10 pts, obtenu {delta:.1f}" | |
| def test_degraded_ocr_yields_negative_or_zero_delta(self) -> None: | |
| """OCR dΓ©gradΓ© : insertions/suppressions cassent les phrases β | |
| delta nul ou nΓ©gatif (lisibilitΓ© chute).""" | |
| gt = "Le chat est noir. Le chien est blanc. Les amis jouent." | |
| ocr_garbled = "L3 ch4t 35t n0ir. L3 ch13n 35t bl4nc. L35 4mi5 jou3nt." | |
| # Comportement variable selon la dΓ©gradation, mais on vΓ©rifie | |
| # au moins que l'Γ©cart est bornΓ©. | |
| delta = flesch_delta(gt, ocr_garbled, "fr") | |
| assert -100.0 <= delta <= 100.0 | |
| def test_delta_is_bounded(self) -> None: | |
| # Cas extrΓͺmes : score chute Γ 0 vs score Γ 100 | |
| d1 = flesch_delta("a b c.", "x" * 200, "fr") | |
| d2 = flesch_delta("x" * 200, "a b c.", "fr") | |
| assert -100.0 <= d1 <= 100.0 | |
| assert -100.0 <= d2 <= 100.0 | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # 6. IntΓ©gration registre typΓ© (Sprint 34) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TestRegistryIntegration: | |
| def test_flesch_metrics_registered_for_text_text(self) -> None: | |
| # Force l'import qui peuple le registre | |
| import picarones.evaluation.metrics.readability # noqa: F401 | |
| selected = select_metrics( | |
| (ArtifactType.TEXT, ArtifactType.TEXT), | |
| ) | |
| names = {spec.name for spec in selected} | |
| assert "flesch_delta_fr" in names | |
| assert "flesch_delta_en" in names | |
| def test_registered_function_returns_same_as_direct_call(self) -> None: | |
| from picarones.evaluation.metric_registry import compute_at_junction | |
| gt = "Je vous envoie cette missive afin de vous informer." | |
| ocr = "Je vous Γ©cris une lettre. Voici la situation." | |
| out = compute_at_junction( | |
| gt, ocr, (ArtifactType.TEXT, ArtifactType.TEXT), | |
| ) | |
| # Le delta enregistrΓ© FR doit matcher l'appel direct | |
| assert out["flesch_delta_fr"] == pytest.approx( | |
| flesch_delta(gt, ocr, "fr"), abs=1e-9, | |
| ) | |