"""Task #31 — deterministic, profile-aware {strengths, caveat} guard. WHAT THIS LOCKS DOWN -------------------- `backend.scorecard.build_profile_summary` replaces the generic grade one-liner with a PER (profile × policy) list of concrete strengths + the single most grade-capping trade-off, computed on the SAME pass as the grade. The non-negotiable invariants this test pins for the FULL 148-policy catalogue, profile-neutral AND across 6 representative profiles: 1. Shape: 0 ≤ len(strengths) ≤ 5. ≥3 whenever ≥3 qualifying facts exist; fewer ONLY when the policy genuinely has fewer real facts (never padded). insufficient-data ⇒ strengths == [] (caller falls back to one_liner). 2. No junk: no empty bullet, no "/100", no standalone letter grade, no literal "null"/"None". 3. Deductible bullet present IFF premium_calculator.policy_deductible_support(pid)[0] is True — across the catalogue that is EXACTLY {bajaj-allianz__health-guard, star-health__star-assure} (BUG #29 invariant). 4. Non-fabrication: every numeric token in every strength is regex- extractable AND traces to a value the scorecard's OWN helpers can read off that policy / insurer_reviews (a strength can never assert a number the grade didn't see). 5. Caveat is None OR derived from a "− "-prefixed (U+2212 + space) signal literally present in that scorecard's sub.signals (never invented / contradicting). 6. Maternity strength absent when no spouse/partner on the profile. 7. Determinism: same (policy, profile) twice ⇒ byte-identical. """ from __future__ import annotations import json import re import sys from pathlib import Path import pytest _REPO_ROOT = Path(__file__).resolve().parent.parent if str(_REPO_ROOT) not in sys.path: sys.path.insert(0, str(_REPO_ROOT)) import backend.main as M # noqa: E402 from backend.config import settings # noqa: E402 from backend.premium_calculator import policy_deductible_support # noqa: E402 from backend.scorecard import ( # noqa: E402 _profile_tuned_weights, build_profile_summary, build_scorecard, ) # The exact catalogue-wide deductible-supporting set (BUG #29). Anything else # claiming a voluntary-deductible strength is a fabrication. _DEDUCTIBLE_POLICIES = {"bajaj-allianz__health-guard", "star-health__star-assure"} # 6 representative profiles spanning the weight-tuner's branches. _PROFILES = [ None, { # young, healthy, first-time, tax goal, metro "age": 28, "dependents": "self", "income_band": "10L-25L", "primary_goal": "tax_planning", "location_tier": "metro", "health_conditions": [], "existing_cover_inr": 0, "copay_pct": 0, }, { # senior + spouse + diabetes + family history + has cover "age": 58, "dependents": "self+spouse", "income_band": "25L+", "primary_goal": "upgrade", "location_tier": "metro", "health_conditions": ["diabetes"], "family_medical_history": ["heart"], "existing_cover_inr": 500000, "parents_to_insure": False, }, { # family with kids, tier3, budget-tight "age": 41, "dependents": "self+spouse+kids", "income_band": "5L-10L", "primary_goal": "first_buy", "location_tier": "tier3", "health_conditions": [], "budget_band": "under_15k", "existing_cover_inr": 0, }, { # parents-to-insure, parents have PED "age": 35, "dependents": "self+parents", "income_band": "10L-25L", "primary_goal": "compare_specific", "location_tier": "tier2", "health_conditions": [], "parents_to_insure": True, "parents_age_max": 68, "parents_has_ped": True, }, { # mid, hypertension, copay tolerance stated "age": 47, "dependents": "self+spouse", "income_band": "25L+", "primary_goal": "upgrade", "location_tier": "metro", "health_conditions": ["hypertension"], "copay_pct": 0, "existing_cover_inr": 1000000, }, ] _NUM_RE = re.compile(r"\d[\d,]*(?:\.\d+)?") _GRADE_RE = re.compile(r"\b[ABCDF]\b") def _catalogue_ids() -> list[str]: cards = M._marketplace_catalogue(None) assert len(cards) > 100, f"catalogue collapsed: {len(cards)}" return [c.policy_id for c in cards] def _resolve_policy(pid: str) -> dict | None: """Resolve a card id → policy dict EXACTLY like /api/policies/{id}/scorecard.""" cur = M._load_curated_facts() ep = settings.EXTRACTED_DIR / f"{pid}.json" if ep.exists(): try: policy = json.loads(ep.read_text()) except Exception: return None return M._merge_curated( policy, cur.get(policy.get("policy_id", pid)) or cur.get(pid) ) policy = ( cur.get(pid) or cur.get(f"{pid}__wordings") or cur.get(f"{pid}__cis") or cur.get(f"{pid}__brochure") or cur.get(f"{pid}__prospectus") ) if not policy: return None policy = dict(policy) policy.setdefault("policy_id", pid) return policy def _insurer_reviews(policy: dict) -> dict | None: slug = policy.get("insurer_slug") if not slug: return None rp = settings.DATA_DIR / "reviews" / f"{slug}.json" if rp.exists(): try: return json.loads(rp.read_text()) except Exception: return None return None def _readable_numbers(policy: dict, sc, reviews: dict | None) -> set[str]: """Every integer/float a strength could LEGITIMATELY quote — each read via the SAME scorecard helper (`_int` / `_pick_alias`) the generator uses to build that strength, PLUS the insurer CSR/year. The non-fabrication anchor: a strength may only state a number this set proves the scorecard itself can read off this policy. A token outside this set is a fabrication.""" from backend.scorecard import _int as _sc_int ok: set[str] = set() def _add(v): if v is None: return ok.add(str(v).replace(",", "")) try: ok.add(str(int(float(v)))) ok.add(f"{float(v):.1f}") except (TypeError, ValueError): pass # Numbers the scorecard surfaced in its own signals (helper-read). for s in sc.sub_scores: for sig in s.signals: for m in _NUM_RE.findall(sig): ok.add(m.replace(",", "")) # Exactly the fields each strength reads via _int, read the SAME way. for fld in ( "deductible_amount", "max_entry_age", "pre_existing_disease_waiting_months", "maternity_waiting_months", "network_hospital_count", "no_claim_bonus_pct", "copayment_pct", ): _add(_sc_int(policy, fld)) # Insurer CSR (+ year) for the high-CSR strength. if reviews: cm = reviews.get("claim_metrics", {}) or {} _add(cm.get("claim_settlement_ratio_pct")) yr = str(cm.get("claim_settlement_ratio_year", "") or "") for m in _NUM_RE.findall(yr): ok.add(m.replace(",", "")) return ok @pytest.mark.parametrize("profile", _PROFILES, ids=lambda p: "neutral" if p is None else f"{p.get('age')}/{p.get('dependents')}") def test_full_catalogue_profile_summary_invariants(profile): """Every catalogued policy, this profile: shape + no-junk + deductible gate + non-fabrication + maternity-suppression + determinism.""" junk_fails: list[str] = [] shape_fails: list[str] = [] ded_fails: list[str] = [] fab_fails: list[str] = [] mat_fails: list[str] = [] det_fails: list[str] = [] caveat_fails: list[str] = [] has_spouse = bool( profile and any( k in str(profile.get("dependents") or "").lower() for k in ("spouse", "wife", "husband", "partner") ) ) for pid in _catalogue_ids(): policy = _resolve_policy(pid) if policy is None: continue reviews = _insurer_reviews(policy) sc = build_scorecard(policy, insurer_reviews=reviews, profile=profile) ps = sc.profile_summary assert ps is not None, f"{pid}: profile_summary is None" if sc.insufficient_data: if ps.strengths != [] or ps.caveat is not None: shape_fails.append(f"{pid}: insufficient ⇒ must be empty, got {ps}") continue # 1. shape if not (0 <= len(ps.strengths) <= 5): shape_fails.append(f"{pid}: {len(ps.strengths)} strengths (must be 0..5)") # 2. no junk for b in ps.strengths + ([ps.caveat] if ps.caveat else []): if not b or not b.strip(): junk_fails.append(f"{pid}: empty bullet") if "/100" in b: junk_fails.append(f"{pid}: '/100' in {b!r}") if b.strip() in {"null", "None"}: junk_fails.append(f"{pid}: literal null/None {b!r}") # A bare standalone grade letter as a whole bullet. if _GRADE_RE.fullmatch(b.strip()): junk_fails.append(f"{pid}: standalone grade letter {b!r}") # 3. deductible gate ded_strength = any( "voluntary deductible" in s.lower() for s in ps.strengths ) ded_ok = policy_deductible_support(pid)[0] is True if ded_strength != ded_ok: ded_fails.append( f"{pid}: deductible strength={ded_strength} but support={ded_ok}" ) # 4. non-fabrication — every numeric token traces to a readable value readable = _readable_numbers(policy, sc, reviews) for s in ps.strengths: for tok in _NUM_RE.findall(s): norm = tok.replace(",", "") # tolerate "92.2" matching a stored 92.2 / 92 / "92.2" cands = {norm, norm.split(".")[0]} try: cands.add(f"{float(norm):.1f}") cands.add(str(int(float(norm)))) except ValueError: pass if not (cands & readable): fab_fails.append( f"{pid}: numeric {tok!r} in strength {s!r} not " f"readable from policy/reviews" ) # 5. caveat ⟺ the generator's EXACT contract: pick the most # grade-capping, profile-relevant sub via the SAME profile-tuned # weights (argmax weights[name]*(100-score)); its FIRST "− "-prefixed # signal must exist iff the caveat is non-None, and the caveat must # be a deterministic transform that carries that signal's stripped # text OR its numeric token (never invented / contradicting). weights = _profile_tuned_weights(profile) def _gap(sub): return weights.get(sub.name, 0.0) * (100 - sub.score) top = max(sc.sub_scores, key=_gap) if sc.sub_scores else None first_neg = None if top is not None: first_neg = next( (sig for sig in (top.signals or []) if sig.startswith("− ")), None, ) if first_neg is None: if ps.caveat is not None: caveat_fails.append( f"{pid}: caveat {ps.caveat!r} but top sub " f"{getattr(top, 'name', None)!r} has NO '− ' signal" ) else: if ps.caveat is None: caveat_fails.append( f"{pid}: top sub has '− ' signal {first_neg!r} but " f"caveat is None" ) else: raw = first_neg[2:].strip() nums = _NUM_RE.findall(raw) # The caveat is a deterministic plain-language transform of # exactly THIS signal: it must carry the signal's number(s) # OR a stable keyword from the signal — never unrelated text. kws = [ "ped", "copay", "co-pay", "room rent", "csr", "claim settlement", "cashless", "network", "initial", "maternity", "deductible", "day-care", "day care", ] rl = raw.lower() cl = ps.caveat.lower() traced = ( (nums and all(n in ps.caveat for n in nums)) or any(k in rl and k in cl for k in kws) or raw in ps.caveat # generic "One trade-off: ." ) if not traced: caveat_fails.append( f"{pid}: caveat {ps.caveat!r} not a transform of the " f"top sub's first '− ' signal {first_neg!r}" ) # 6. maternity suppressed without spouse/partner if not has_spouse: if any("maternity" in s.lower() for s in ps.strengths): mat_fails.append(f"{pid}: maternity strength without spouse") # 7. determinism sc2 = build_scorecard(policy, insurer_reviews=reviews, profile=profile) if ( sc2.profile_summary.strengths != ps.strengths or sc2.profile_summary.caveat != ps.caveat ): det_fails.append(f"{pid}: non-deterministic profile_summary") assert not shape_fails, f"SHAPE ({len(shape_fails)}): {shape_fails[:8]}" assert not junk_fails, f"JUNK ({len(junk_fails)}): {junk_fails[:8]}" assert not ded_fails, f"DEDUCTIBLE GATE ({len(ded_fails)}): {ded_fails[:8]}" assert not fab_fails, f"FABRICATION ({len(fab_fails)}): {fab_fails[:8]}" assert not caveat_fails, f"CAVEAT ({len(caveat_fails)}): {caveat_fails[:8]}" assert not mat_fails, f"MATERNITY ({len(mat_fails)}): {mat_fails[:8]}" assert not det_fails, f"DETERMINISM ({len(det_fails)}): {det_fails[:8]}" def test_deductible_strength_is_exactly_the_two_known_policies(): """The voluntary-deductible strength appears for EXACTLY {bajaj-allianz__health-guard, star-health__star-assure} and no other catalogued policy (the BUG #29 catalogue-wide invariant).""" seen: set[str] = set() for pid in _catalogue_ids(): policy = _resolve_policy(pid) if policy is None: continue sc = build_scorecard( policy, insurer_reviews=_insurer_reviews(policy), profile=None ) ps = sc.profile_summary if ps and any("voluntary deductible" in s.lower() for s in ps.strengths): seen.add(pid) assert seen == _DEDUCTIBLE_POLICIES, ( f"deductible strength leaked / missing: got {sorted(seen)}, " f"expected {sorted(_DEDUCTIBLE_POLICIES)}" ) def test_maternity_appears_when_spouse_present_and_policy_covers_it(): """Positive control for the maternity-suppression rule: with a spouse on the profile, at least one maternity-covering policy DOES surface a maternity strength (so the rule isn't just always-off).""" spouse_profile = { "age": 32, "dependents": "self+spouse", "income_band": "10L-25L", "primary_goal": "first_buy", "location_tier": "metro", "health_conditions": [], } found = False for pid in _catalogue_ids(): policy = _resolve_policy(pid) if policy is None: continue sc = build_scorecard( policy, insurer_reviews=_insurer_reviews(policy), profile=spouse_profile, ) ps = sc.profile_summary if ps and any("maternity" in s.lower() for s in ps.strengths): found = True break assert found, ( "no maternity strength surfaced for ANY policy even with a spouse " "on the profile — suppression rule is stuck off" ) def test_insufficient_data_yields_empty_profile_summary(): """A genuinely-bare policy takes the defined honest path and the profile_summary is the empty form (caller falls back to one_liner).""" sc = build_scorecard( {"policy_id": "x__bare", "policy_name": "Bare", "insurer_slug": "x"} ) assert sc.insufficient_data is True assert sc.profile_summary is not None assert sc.profile_summary.strengths == [] assert sc.profile_summary.caveat is None def test_copay_preference_tag_only_when_user_states_zero_copay(): """The '(your stated preference)' qualifier on the zero-co-pay strength appears IFF the profile carries copay_pct == 0 — deterministic, derived, never fabricated.""" # bajaj-allianz__health-guard has copayment_pct == 0 in curated facts. policy = _resolve_policy("star-health__star-assure") assert policy is not None rv = _insurer_reviews(policy) no_pref = build_scorecard(policy, insurer_reviews=rv, profile={ "age": 30, "dependents": "self", "income_band": "10L-25L", "primary_goal": "first_buy", "location_tier": "metro", "health_conditions": [], }).profile_summary with_pref = build_scorecard(policy, insurer_reviews=rv, profile={ "age": 30, "dependents": "self", "income_band": "10L-25L", "primary_goal": "first_buy", "location_tier": "metro", "health_conditions": [], "copay_pct": 0, }).profile_summary zc_no = [s for s in no_pref.strengths if "co-payment" in s.lower()] zc_yes = [s for s in with_pref.strengths if "co-payment" in s.lower()] # star-assure has copayment_pct of 10 (mandatory) — no zero-copay # strength either way; assert the qualifier never appears WITHOUT the # preference, and that the tag string is exclusively preference-gated. assert all("your stated preference" not in s for s in zc_no), zc_no for s in with_pref.strengths: if "your stated preference" in s: assert "co-payment" in s.lower() if __name__ == "__main__": raise SystemExit(pytest.main([__file__, "-rA", "-p", "no:warnings"]))