""" Unit tests for `6_recommendation_engine.py` — the rule engine that decides demande de localisation PAR completeness. The tests bypass the LayoutLMv3 pipeline entirely: we build `DocumentSummary` instances by hand (with synthetic field extractions) and call the rule methods directly. Fast (~1 s once the module is loaded). """ from __future__ import annotations import pytest # ────────────────────────────────────────────────────────────────────────── # _norm_ref — separator strip + diacritic / digit-glyph folding # ────────────────────────────────────────────────────────────────────────── @pytest.mark.parametrize("raw, expected", [ ("PC 044 035 25 00035", "PC0440352500035"), ("PC-044-035-25-00035", "PC0440352500035"), ("PC/044/035", "PC044035"), ("PC YOO65", "PC Y0065".replace(" ", "")), # O → 0 fold ("PCY0065", "PCY0065"), ("", ""), (None, ""), ]) def test_norm_ref(reco_mod, raw, expected): assert reco_mod._norm_ref(raw) == expected # ────────────────────────────────────────────────────────────────────────── # _edit_distance — pure Levenshtein # ────────────────────────────────────────────────────────────────────────── @pytest.mark.parametrize("a, b, expected", [ ("abc", "abc", 0), ("abc", "abd", 1), ("abc", "ab", 1), ("", "abc", 3), ("PC03306323Z0475", "PC0330632Z0475", 1), # missing one digit ("PC03306323Z0475", "PC03306323Z0475", 0), # identical ]) def test_edit_distance(reco_mod, a, b, expected): assert reco_mod._edit_distance(a, b) == expected # ────────────────────────────────────────────────────────────────────────── # _autorisation_matches — tri-state (True / False / None) # ────────────────────────────────────────────────────────────────────────── def _doc(reco_mod, doc_class="Autorisation", ref=None): fields = {} if ref is not None: fields["Reference_Urbanisme"] = {"value": ref, "confidence": 0.99} return reco_mod.DocumentSummary( file=f"file_{doc_class}.pdf", doc_class=doc_class, doc_confidence=0.95, fields=fields, flags=[], ) def test_autorisation_matches_exact(reco_mod, engine_no_pipeline): autos = [_doc(reco_mod, ref="PC 044 035 25 00035")] assert engine_no_pipeline._autorisation_matches("PC0440352500035", autos) is True def test_autorisation_matches_with_ocr_drift(reco_mod, engine_no_pipeline): """One missing digit (PC0330632 vs PC03306323) should still match.""" autos = [_doc(reco_mod, ref="PC0330632Z0475")] assert engine_no_pipeline._autorisation_matches("PC03306323Z0475", autos) is True def test_autorisation_matches_with_glyph_fold(reco_mod, engine_no_pipeline): """OCR misread of digit `0` as letter `O` — O↔0 fold should rescue.""" autos = [_doc(reco_mod, ref="PC 056 260 22 YOO65")] assert engine_no_pipeline._autorisation_matches("PC05626022Y0065", autos) is True def test_autorisation_matches_false_when_clearly_different(reco_mod, engine_no_pipeline): autos = [_doc(reco_mod, ref="PC 999 999 99 99999")] assert engine_no_pipeline._autorisation_matches("PC0440352500035", autos) is False def test_autorisation_matches_none_when_no_readable_ref(reco_mod, engine_no_pipeline): """If the autorisation has no extractable reference, return None (not False) so the engine routes to manual_review rather than crying "incohérent".""" autos = [_doc(reco_mod)] # no ref field assert engine_no_pipeline._autorisation_matches("PC0440352500035", autos) is None def test_autorisation_matches_empty_fiche_ref(reco_mod, engine_no_pipeline): """If we can't compare (fiche ref also empty), don't flag — return True.""" autos = [_doc(reco_mod, ref="PC0440352500035")] assert engine_no_pipeline._autorisation_matches("", autos) is True # ────────────────────────────────────────────────────────────────────────── # _filename_class_hint # ────────────────────────────────────────────────────────────────────────── @pytest.mark.parametrize("fname, expected", [ ("PF0442_Plan-de-situation_PAR-1-1.pdf", "PlanSituation"), ("PF0442_Plan-de-masse_PAR-1-1.pdf", "PlanMasse"), ("PF0442_Fiche-de-renseignement_1.pdf", "fiche"), ("PF0442_Autorisation-d-urbanisme_1.pdf", "Autorisation"), ("PF0442_Certificat-d-adressage_1.pdf", "Certificat"), ("PF0442_Mandat_PAR-1-1.pdf", "Mandat"), # Alternate naming we added ("0335502500011 ARRETE PC.jpg", "Autorisation"), ("0335502500011 CERTIFICAT ADRESSAGE.jpg", "Certificat"), ("0335502500011 PLAN DE MASSE.jpg", "PlanMasse"), ("0335502500011 PLAN DE SITUATION.jpg", "PlanSituation"), ("0821212500015 ATTESTATION CONFORMITE.pdf", "Autorisation"), ("ADRESSAGE.jpg", "Certificat"), # Unknowns ("random_doc.pdf", None), ("20260202_1232_MONTPELLIER.pdf", None), ]) def test_filename_hint(engine_no_pipeline, fname, expected): assert engine_no_pipeline._filename_class_hint(fname) == expected # ────────────────────────────────────────────────────────────────────────── # _is_out_of_scope_file # ────────────────────────────────────────────────────────────────────────── @pytest.mark.parametrize("fname, expected", [ ("PF0442_PV-Loc-PAR_PAR-2-1_1.pdf", True), ("PF0850_Plan-et-ou-photo-du-PAR-souhaite_PAR-2-1_1.pdf", True), ("PF0442_Autre_1.pdf", True), ("PF0442_Autre_PAR-1-1_1.png", True), # the \b fix ("PF0335_Autre_3 (1).pdf", True), # negatives ("PF0442_Autorisation-d-urbanisme.pdf", False), ("PF0442_Plan-de-masse_PAR-1-1.pdf", False), ("PF0442_Fiche-de-renseignement.pdf", False), ]) def test_is_out_of_scope_file(engine_no_pipeline, fname, expected): assert engine_no_pipeline._is_out_of_scope_file(fname) is expected # ────────────────────────────────────────────────────────────────────────── # _is_recolement_dossier — short-circuit for post-installation packages # ────────────────────────────────────────────────────────────────────────── def test_recolement_detected(engine_no_pipeline): names = ["RECOLLEMENT.pdf", "0821 ATTESTATION CONFORMITE.pdf"] assert engine_no_pipeline._is_recolement_dossier(names) is True def test_recolement_accent(engine_no_pipeline): names = ["dossier_de_récolement.pdf"] assert engine_no_pipeline._is_recolement_dossier(names) is True def test_recolement_not_detected_for_normal_demande(engine_no_pipeline): names = [ "PF0442_Fiche-de-renseignement.pdf", "PF0442_Autorisation-d-urbanisme.pdf", "PF0442_Plan-de-masse.pdf", ] assert engine_no_pipeline._is_recolement_dossier(names) is False # ────────────────────────────────────────────────────────────────────────── # Build verdict from synthetic Documents — the core rule engine logic # ────────────────────────────────────────────────────────────────────────── def _make_doc(reco_mod, file, cls, conf=0.95, fields=None, flags=None): return reco_mod.DocumentSummary( file=file, doc_class=cls, doc_confidence=conf, fields=fields or {}, flags=flags or [], ) def test_build_verdict_complete(reco_mod, engine_no_pipeline): docs = [ _make_doc(reco_mod, "fiche.pdf", "fiche", fields={ "Reference_Urbanisme": {"value": "PC0440352500035", "confidence": 0.99}, "DLPI": {"value": "01/09/2026", "confidence": 0.98}, "Disposition_Mandat": {"value": "OUI", "confidence": 0.99}, "nb_log_totale": {"value": "5", "confidence": 0.70}, }), _make_doc(reco_mod, "auto.pdf", "Autorisation", fields={ "Reference_Urbanisme": {"value": "PC 044 035 25 00035", "confidence": 0.99}, }), _make_doc(reco_mod, "plan_masse.pdf", "PlanMasse"), _make_doc(reco_mod, "plan_situation.pdf", "PlanSituation"), _make_doc(reco_mod, "mandat.pdf", "Mandat"), ] v = engine_no_pipeline._build_verdict(docs) assert v.status == "complète" assert v.missing_documents == [] assert v.incomplete_documents == [] def test_build_verdict_missing_fiche(reco_mod, engine_no_pipeline): docs = [ _make_doc(reco_mod, "auto.pdf", "Autorisation"), _make_doc(reco_mod, "plan_masse.pdf", "PlanMasse"), _make_doc(reco_mod, "plan_sit.pdf", "PlanSituation"), ] v = engine_no_pipeline._build_verdict(docs) assert v.status == "incomplète" assert any("fiche" in m.lower() for m in v.missing_documents) def test_build_verdict_unreadable_auto_routes_to_manual_review(reco_mod, engine_no_pipeline): """Fiche has a ref, autorisation present but no readable ref → manual_review.""" docs = [ _make_doc(reco_mod, "fiche.pdf", "fiche", fields={ "Reference_Urbanisme": {"value": "PC2221525Q0037", "confidence": 0.99}, "DLPI": {"value": "01/09/2026", "confidence": 0.98}, "nb_log_totale": {"value": "1", "confidence": 0.70}, }), _make_doc(reco_mod, "auto.jpg", "Autorisation"), # no Reference_Urbanisme extracted _make_doc(reco_mod, "plan_masse.pdf", "PlanMasse"), _make_doc(reco_mod, "plan_situation.pdf", "PlanSituation"), ] v = engine_no_pipeline._build_verdict(docs) # Should NOT be flagged "incohérent" assert not any("incohérent" in m.lower() for m in v.incomplete_documents) # Should appear in manual_review with the "n'a pas pu être lu" phrasing assert any("n'a pas pu être lu" in m for m in v.manual_review_documents) def test_build_verdict_recolement_short_circuit(reco_mod, engine_no_pipeline): docs = [ _make_doc(reco_mod, "ATTESTATION CONFORMITE.pdf", "Autorisation"), _make_doc(reco_mod, "TRANCHEE FERMEE.jpg", "PlanSituation"), _make_doc(reco_mod, "RECOLLEMENT.pdf", "Certificat"), ] v = engine_no_pipeline._build_verdict(docs) assert v.status == "hors-périmètre" assert any("récolement" in m.lower() for m in v.manual_review_documents) # Should bypass the regular rules — no "missing fiche" etc. assert v.missing_documents == [] assert v.incomplete_documents == [] def test_build_verdict_out_of_scope_excluded_from_class_count(reco_mod, engine_no_pipeline): """A PV-Loc-PAR classified as PlanMasse should NOT satisfy the 'Plan de masse manquant' rule — out_of_scope_document flag excludes it from class counting.""" docs = [ _make_doc(reco_mod, "fiche.pdf", "fiche", fields={ "Reference_Urbanisme": {"value": "PC0440352500035", "confidence": 0.99}, "DLPI": {"value": "01/09/2026", "confidence": 0.98}, "nb_log_totale": {"value": "1", "confidence": 0.70}, }), _make_doc(reco_mod, "auto.pdf", "Autorisation", fields={ "Reference_Urbanisme": {"value": "PC0440352500035", "confidence": 0.99}, }), _make_doc(reco_mod, "PV-Loc-PAR.pdf", "PlanMasse", flags=["out_of_scope_document"]), # the only "plan masse" _make_doc(reco_mod, "plan_situation.pdf", "PlanSituation"), ] v = engine_no_pipeline._build_verdict(docs) assert v.status == "incomplète" assert any("plan de masse" in m.lower() for m in v.missing_documents) def test_build_verdict_disposition_mandat_undetermined_to_manual_review(reco_mod, engine_no_pipeline): """Disposition_Mandat couldn't be read AND no Mandat doc provided → manual_review entry, NOT 'Mandat manquant' in missing_documents.""" docs = [ _make_doc(reco_mod, "fiche.pdf", "fiche", fields={ "Reference_Urbanisme": {"value": "PC0440352500035", "confidence": 0.99}, "DLPI": {"value": "01/09/2026", "confidence": 0.98}, "nb_log_totale": {"value": "1", "confidence": 0.70}, # No Disposition_Mandat key — undetermined }), _make_doc(reco_mod, "auto.pdf", "Autorisation", fields={ "Reference_Urbanisme": {"value": "PC0440352500035", "confidence": 0.99}, }), _make_doc(reco_mod, "plan_masse.pdf", "PlanMasse"), _make_doc(reco_mod, "plan_situation.pdf", "PlanSituation"), ] v = engine_no_pipeline._build_verdict(docs) assert not any("mandat" in m.lower() for m in v.missing_documents) assert any("Mandat" in m for m in v.manual_review_documents)