"""Guard for the recommendation-card EXTRACTION GATE (#29). THE BUG (from a real production screenshot): A recommended card rendered with the raw policy_id slug as its title ("manipalcigna__sarv…"), grade "N/A", body "No extraction available for this policy.", and "Why this fits you: Data not indexed". ROOT CAUSE: `_scorecard_signal` / `_quality_seed_candidates` grade off the ~790-entry CURATED layer, but the card UI renders from the EXTRACTED layer (settings.EXTRACTED_DIR/*.json — the same set the marketplace shows). Quality-seed injected curated-graded-but-not-extracted policies into the candidate pool, so the LLM could recommend a policy whose card cannot render. THE CONTRACT THIS PINS: A policy with no extracted corpus file is NEVER quality-seeded and is ALWAYS dropped from the cited set, even if the LLM explicitly marks it — so a broken "N/A / No extraction available" card can never reach the UI. This file deliberately uses the REAL `_has_extraction` predicate (the package-wide conftest autouse fixture stubs it True for the logic tests; here we restore the real one so the gate itself is exercised). """ from __future__ import annotations import sys from pathlib import Path import pytest _REPO_ROOT = Path(__file__).resolve().parent.parent if str(_REPO_ROOT) not in sys.path: sys.path.insert(0, str(_REPO_ROOT)) from backend import brain_tools # noqa: E402 from backend.brain_tools import ( # noqa: E402 _has_extraction as _REAL_HAS_EXTRACTION, _quality_seed_candidates, ) from backend.config import settings # noqa: E402 from backend.single_brain import _build_recommendation_citations # noqa: E402 def _a_real_extracted_stem() -> str: """Any policy_id that genuinely has an extracted corpus file on disk.""" files = sorted(settings.EXTRACTED_DIR.glob("*.json")) assert files, "no extracted corpus files — cannot test the gate" return files[0].stem @pytest.fixture def real_extraction(monkeypatch): """Override the conftest autouse stub: use the REAL predicate so the gate's actual on-disk behaviour is what gets exercised here.""" monkeypatch.setattr(brain_tools, "_has_extraction", _REAL_HAS_EXTRACTION) brain_tools._extraction_cache.clear() brain_tools._qseed_cache.clear() return _REAL_HAS_EXTRACTION def test_predicate_true_for_extracted_false_for_missing(real_extraction): real = _a_real_extracted_stem() assert brain_tools._has_extraction(real) is True assert ( brain_tools._has_extraction("definitely__not-a-real-policy-xyz") is False ) assert brain_tools._has_extraction("") is False def test_non_extracted_policy_never_cited_even_when_marked(real_extraction): """The exact production failure: a marked policy with no extracted corpus must be DROPPED, not rendered as an N/A card.""" real = _a_real_extracted_stem() chunks = [ { "chunk_id": "real1", "policy_id": real, "policy_name": "Real Extracted Plan", "insurer_slug": real.split("__", 1)[0] if "__" in real else "x", "doc_type": "policy", "source_url": f"https://example.com/{real}.pdf", "score": 0.9, }, { "chunk_id": "ghost1", "policy_id": "manipalcigna__sarvah-param-NOT-EXTRACTED", "policy_name": "Ghost Plan", "insurer_slug": "manipalcigna", "doc_type": "policy", "source_url": "", "score": 0.95, # higher score — must STILL be dropped }, ] cites, is_rec = _build_recommendation_citations( reply_text="See Real Extracted Plan and Ghost Plan.", retrieved_chunks_all=chunks, marked_policy_ids=[ "manipalcigna__sarvah-param-NOT-EXTRACTED", real, ], ) assert is_rec is True ids = [c["policy_id"] for c in cites] assert "manipalcigna__sarvah-param-NOT-EXTRACTED" not in ids assert ids == [real] def test_quality_seed_only_emits_renderable_policies(real_extraction): """Every quality-seeded candidate must have an extracted file — so it can never inject a policy whose card renders as N/A.""" seeded = _quality_seed_candidates(profile=None, limit=25) assert seeded, "quality-seed returned nothing — basket starved" offenders = [ c["policy_id"] for c in seeded if not _REAL_HAS_EXTRACTION(c.get("policy_id") or "") ] assert not offenders, ( f"quality-seed emitted non-renderable policies: {offenders}" ) if __name__ == "__main__": raise SystemExit(pytest.main([__file__, "-v"]))