Spaces:
Sleeping
Sleeping
| """Task #31 β deterministic, profile-aware {strengths, caveat} guard. | |
| WHAT THIS LOCKS DOWN | |
| -------------------- | |
| `backend.scorecard.build_profile_summary` replaces the generic grade | |
| one-liner with a PER (profile Γ policy) list of concrete strengths + the | |
| single most grade-capping trade-off, computed on the SAME pass as the grade. | |
| The non-negotiable invariants this test pins for the FULL 148-policy | |
| catalogue, profile-neutral AND across 6 representative profiles: | |
| 1. Shape: 0 β€ len(strengths) β€ 5. β₯3 whenever β₯3 qualifying facts exist; | |
| fewer ONLY when the policy genuinely has fewer real facts (never padded). | |
| insufficient-data β strengths == [] (caller falls back to one_liner). | |
| 2. No junk: no empty bullet, no "/100", no standalone letter grade, no | |
| literal "null"/"None". | |
| 3. Deductible bullet present IFF | |
| premium_calculator.policy_deductible_support(pid)[0] is True β across | |
| the catalogue that is EXACTLY {bajaj-allianz__health-guard, | |
| star-health__star-assure} (BUG #29 invariant). | |
| 4. Non-fabrication: every numeric token in every strength is regex- | |
| extractable AND traces to a value the scorecard's OWN helpers can read | |
| off that policy / insurer_reviews (a strength can never assert a number | |
| the grade didn't see). | |
| 5. Caveat is None OR derived from a "β "-prefixed (U+2212 + space) signal | |
| literally present in that scorecard's sub.signals (never invented / | |
| contradicting). | |
| 6. Maternity strength absent when no spouse/partner on the profile. | |
| 7. Determinism: same (policy, profile) twice β byte-identical. | |
| """ | |
| from __future__ import annotations | |
| import json | |
| import re | |
| import sys | |
| from pathlib import Path | |
| import pytest | |
| _REPO_ROOT = Path(__file__).resolve().parent.parent | |
| if str(_REPO_ROOT) not in sys.path: | |
| sys.path.insert(0, str(_REPO_ROOT)) | |
| import backend.main as M # noqa: E402 | |
| from backend.config import settings # noqa: E402 | |
| from backend.premium_calculator import policy_deductible_support # noqa: E402 | |
| from backend.scorecard import ( # noqa: E402 | |
| _profile_tuned_weights, | |
| build_profile_summary, | |
| build_scorecard, | |
| ) | |
| # The exact catalogue-wide deductible-supporting set (BUG #29). Anything else | |
| # claiming a voluntary-deductible strength is a fabrication. | |
| _DEDUCTIBLE_POLICIES = {"bajaj-allianz__health-guard", "star-health__star-assure"} | |
| # 6 representative profiles spanning the weight-tuner's branches. | |
| _PROFILES = [ | |
| None, | |
| { # young, healthy, first-time, tax goal, metro | |
| "age": 28, "dependents": "self", "income_band": "10L-25L", | |
| "primary_goal": "tax_planning", "location_tier": "metro", | |
| "health_conditions": [], "existing_cover_inr": 0, "copay_pct": 0, | |
| }, | |
| { # senior + spouse + diabetes + family history + has cover | |
| "age": 58, "dependents": "self+spouse", "income_band": "25L+", | |
| "primary_goal": "upgrade", "location_tier": "metro", | |
| "health_conditions": ["diabetes"], "family_medical_history": ["heart"], | |
| "existing_cover_inr": 500000, "parents_to_insure": False, | |
| }, | |
| { # family with kids, tier3, budget-tight | |
| "age": 41, "dependents": "self+spouse+kids", "income_band": "5L-10L", | |
| "primary_goal": "first_buy", "location_tier": "tier3", | |
| "health_conditions": [], "budget_band": "under_15k", | |
| "existing_cover_inr": 0, | |
| }, | |
| { # parents-to-insure, parents have PED | |
| "age": 35, "dependents": "self+parents", "income_band": "10L-25L", | |
| "primary_goal": "compare_specific", "location_tier": "tier2", | |
| "health_conditions": [], "parents_to_insure": True, | |
| "parents_age_max": 68, "parents_has_ped": True, | |
| }, | |
| { # mid, hypertension, copay tolerance stated | |
| "age": 47, "dependents": "self+spouse", "income_band": "25L+", | |
| "primary_goal": "upgrade", "location_tier": "metro", | |
| "health_conditions": ["hypertension"], "copay_pct": 0, | |
| "existing_cover_inr": 1000000, | |
| }, | |
| ] | |
| _NUM_RE = re.compile(r"\d[\d,]*(?:\.\d+)?") | |
| _GRADE_RE = re.compile(r"\b[ABCDF]\b") | |
| def _catalogue_ids() -> list[str]: | |
| cards = M._marketplace_catalogue(None) | |
| assert len(cards) > 100, f"catalogue collapsed: {len(cards)}" | |
| return [c.policy_id for c in cards] | |
| def _resolve_policy(pid: str) -> dict | None: | |
| """Resolve a card id β policy dict EXACTLY like /api/policies/{id}/scorecard.""" | |
| cur = M._load_curated_facts() | |
| ep = settings.EXTRACTED_DIR / f"{pid}.json" | |
| if ep.exists(): | |
| try: | |
| policy = json.loads(ep.read_text()) | |
| except Exception: | |
| return None | |
| return M._merge_curated( | |
| policy, cur.get(policy.get("policy_id", pid)) or cur.get(pid) | |
| ) | |
| policy = ( | |
| cur.get(pid) | |
| or cur.get(f"{pid}__wordings") | |
| or cur.get(f"{pid}__cis") | |
| or cur.get(f"{pid}__brochure") | |
| or cur.get(f"{pid}__prospectus") | |
| ) | |
| if not policy: | |
| return None | |
| policy = dict(policy) | |
| policy.setdefault("policy_id", pid) | |
| return policy | |
| def _insurer_reviews(policy: dict) -> dict | None: | |
| slug = policy.get("insurer_slug") | |
| if not slug: | |
| return None | |
| rp = settings.DATA_DIR / "reviews" / f"{slug}.json" | |
| if rp.exists(): | |
| try: | |
| return json.loads(rp.read_text()) | |
| except Exception: | |
| return None | |
| return None | |
| def _readable_numbers(policy: dict, sc, reviews: dict | None) -> set[str]: | |
| """Every integer/float a strength could LEGITIMATELY quote β each read | |
| via the SAME scorecard helper (`_int` / `_pick_alias`) the generator uses | |
| to build that strength, PLUS the insurer CSR/year. The non-fabrication | |
| anchor: a strength may only state a number this set proves the scorecard | |
| itself can read off this policy. A token outside this set is a | |
| fabrication.""" | |
| from backend.scorecard import _int as _sc_int | |
| ok: set[str] = set() | |
| def _add(v): | |
| if v is None: | |
| return | |
| ok.add(str(v).replace(",", "")) | |
| try: | |
| ok.add(str(int(float(v)))) | |
| ok.add(f"{float(v):.1f}") | |
| except (TypeError, ValueError): | |
| pass | |
| # Numbers the scorecard surfaced in its own signals (helper-read). | |
| for s in sc.sub_scores: | |
| for sig in s.signals: | |
| for m in _NUM_RE.findall(sig): | |
| ok.add(m.replace(",", "")) | |
| # Exactly the fields each strength reads via _int, read the SAME way. | |
| for fld in ( | |
| "deductible_amount", | |
| "max_entry_age", | |
| "pre_existing_disease_waiting_months", | |
| "maternity_waiting_months", | |
| "network_hospital_count", | |
| "no_claim_bonus_pct", | |
| "copayment_pct", | |
| ): | |
| _add(_sc_int(policy, fld)) | |
| # Insurer CSR (+ year) for the high-CSR strength. | |
| if reviews: | |
| cm = reviews.get("claim_metrics", {}) or {} | |
| _add(cm.get("claim_settlement_ratio_pct")) | |
| yr = str(cm.get("claim_settlement_ratio_year", "") or "") | |
| for m in _NUM_RE.findall(yr): | |
| ok.add(m.replace(",", "")) | |
| return ok | |
| def test_full_catalogue_profile_summary_invariants(profile): | |
| """Every catalogued policy, this profile: shape + no-junk + deductible | |
| gate + non-fabrication + maternity-suppression + determinism.""" | |
| junk_fails: list[str] = [] | |
| shape_fails: list[str] = [] | |
| ded_fails: list[str] = [] | |
| fab_fails: list[str] = [] | |
| mat_fails: list[str] = [] | |
| det_fails: list[str] = [] | |
| caveat_fails: list[str] = [] | |
| has_spouse = bool( | |
| profile | |
| and any( | |
| k in str(profile.get("dependents") or "").lower() | |
| for k in ("spouse", "wife", "husband", "partner") | |
| ) | |
| ) | |
| for pid in _catalogue_ids(): | |
| policy = _resolve_policy(pid) | |
| if policy is None: | |
| continue | |
| reviews = _insurer_reviews(policy) | |
| sc = build_scorecard(policy, insurer_reviews=reviews, profile=profile) | |
| ps = sc.profile_summary | |
| assert ps is not None, f"{pid}: profile_summary is None" | |
| if sc.insufficient_data: | |
| if ps.strengths != [] or ps.caveat is not None: | |
| shape_fails.append(f"{pid}: insufficient β must be empty, got {ps}") | |
| continue | |
| # 1. shape | |
| if not (0 <= len(ps.strengths) <= 5): | |
| shape_fails.append(f"{pid}: {len(ps.strengths)} strengths (must be 0..5)") | |
| # 2. no junk | |
| for b in ps.strengths + ([ps.caveat] if ps.caveat else []): | |
| if not b or not b.strip(): | |
| junk_fails.append(f"{pid}: empty bullet") | |
| if "/100" in b: | |
| junk_fails.append(f"{pid}: '/100' in {b!r}") | |
| if b.strip() in {"null", "None"}: | |
| junk_fails.append(f"{pid}: literal null/None {b!r}") | |
| # A bare standalone grade letter as a whole bullet. | |
| if _GRADE_RE.fullmatch(b.strip()): | |
| junk_fails.append(f"{pid}: standalone grade letter {b!r}") | |
| # 3. deductible gate | |
| ded_strength = any( | |
| "voluntary deductible" in s.lower() for s in ps.strengths | |
| ) | |
| ded_ok = policy_deductible_support(pid)[0] is True | |
| if ded_strength != ded_ok: | |
| ded_fails.append( | |
| f"{pid}: deductible strength={ded_strength} but support={ded_ok}" | |
| ) | |
| # 4. non-fabrication β every numeric token traces to a readable value | |
| readable = _readable_numbers(policy, sc, reviews) | |
| for s in ps.strengths: | |
| for tok in _NUM_RE.findall(s): | |
| norm = tok.replace(",", "") | |
| # tolerate "92.2" matching a stored 92.2 / 92 / "92.2" | |
| cands = {norm, norm.split(".")[0]} | |
| try: | |
| cands.add(f"{float(norm):.1f}") | |
| cands.add(str(int(float(norm)))) | |
| except ValueError: | |
| pass | |
| if not (cands & readable): | |
| fab_fails.append( | |
| f"{pid}: numeric {tok!r} in strength {s!r} not " | |
| f"readable from policy/reviews" | |
| ) | |
| # 5. caveat βΊ the generator's EXACT contract: pick the most | |
| # grade-capping, profile-relevant sub via the SAME profile-tuned | |
| # weights (argmax weights[name]*(100-score)); its FIRST "β "-prefixed | |
| # signal must exist iff the caveat is non-None, and the caveat must | |
| # be a deterministic transform that carries that signal's stripped | |
| # text OR its numeric token (never invented / contradicting). | |
| weights = _profile_tuned_weights(profile) | |
| def _gap(sub): | |
| return weights.get(sub.name, 0.0) * (100 - sub.score) | |
| top = max(sc.sub_scores, key=_gap) if sc.sub_scores else None | |
| first_neg = None | |
| if top is not None: | |
| first_neg = next( | |
| (sig for sig in (top.signals or []) if sig.startswith("β ")), | |
| None, | |
| ) | |
| if first_neg is None: | |
| if ps.caveat is not None: | |
| caveat_fails.append( | |
| f"{pid}: caveat {ps.caveat!r} but top sub " | |
| f"{getattr(top, 'name', None)!r} has NO 'β ' signal" | |
| ) | |
| else: | |
| if ps.caveat is None: | |
| caveat_fails.append( | |
| f"{pid}: top sub has 'β ' signal {first_neg!r} but " | |
| f"caveat is None" | |
| ) | |
| else: | |
| raw = first_neg[2:].strip() | |
| nums = _NUM_RE.findall(raw) | |
| # The caveat is a deterministic plain-language transform of | |
| # exactly THIS signal: it must carry the signal's number(s) | |
| # OR a stable keyword from the signal β never unrelated text. | |
| kws = [ | |
| "ped", "copay", "co-pay", "room rent", "csr", | |
| "claim settlement", "cashless", "network", "initial", | |
| "maternity", "deductible", "day-care", "day care", | |
| ] | |
| rl = raw.lower() | |
| cl = ps.caveat.lower() | |
| traced = ( | |
| (nums and all(n in ps.caveat for n in nums)) | |
| or any(k in rl and k in cl for k in kws) | |
| or raw in ps.caveat # generic "One trade-off: <raw>." | |
| ) | |
| if not traced: | |
| caveat_fails.append( | |
| f"{pid}: caveat {ps.caveat!r} not a transform of the " | |
| f"top sub's first 'β ' signal {first_neg!r}" | |
| ) | |
| # 6. maternity suppressed without spouse/partner | |
| if not has_spouse: | |
| if any("maternity" in s.lower() for s in ps.strengths): | |
| mat_fails.append(f"{pid}: maternity strength without spouse") | |
| # 7. determinism | |
| sc2 = build_scorecard(policy, insurer_reviews=reviews, profile=profile) | |
| if ( | |
| sc2.profile_summary.strengths != ps.strengths | |
| or sc2.profile_summary.caveat != ps.caveat | |
| ): | |
| det_fails.append(f"{pid}: non-deterministic profile_summary") | |
| assert not shape_fails, f"SHAPE ({len(shape_fails)}): {shape_fails[:8]}" | |
| assert not junk_fails, f"JUNK ({len(junk_fails)}): {junk_fails[:8]}" | |
| assert not ded_fails, f"DEDUCTIBLE GATE ({len(ded_fails)}): {ded_fails[:8]}" | |
| assert not fab_fails, f"FABRICATION ({len(fab_fails)}): {fab_fails[:8]}" | |
| assert not caveat_fails, f"CAVEAT ({len(caveat_fails)}): {caveat_fails[:8]}" | |
| assert not mat_fails, f"MATERNITY ({len(mat_fails)}): {mat_fails[:8]}" | |
| assert not det_fails, f"DETERMINISM ({len(det_fails)}): {det_fails[:8]}" | |
| def test_deductible_strength_is_exactly_the_two_known_policies(): | |
| """The voluntary-deductible strength appears for EXACTLY | |
| {bajaj-allianz__health-guard, star-health__star-assure} and no other | |
| catalogued policy (the BUG #29 catalogue-wide invariant).""" | |
| seen: set[str] = set() | |
| for pid in _catalogue_ids(): | |
| policy = _resolve_policy(pid) | |
| if policy is None: | |
| continue | |
| sc = build_scorecard( | |
| policy, insurer_reviews=_insurer_reviews(policy), profile=None | |
| ) | |
| ps = sc.profile_summary | |
| if ps and any("voluntary deductible" in s.lower() for s in ps.strengths): | |
| seen.add(pid) | |
| assert seen == _DEDUCTIBLE_POLICIES, ( | |
| f"deductible strength leaked / missing: got {sorted(seen)}, " | |
| f"expected {sorted(_DEDUCTIBLE_POLICIES)}" | |
| ) | |
| def test_maternity_appears_when_spouse_present_and_policy_covers_it(): | |
| """Positive control for the maternity-suppression rule: with a spouse on | |
| the profile, at least one maternity-covering policy DOES surface a | |
| maternity strength (so the rule isn't just always-off).""" | |
| spouse_profile = { | |
| "age": 32, "dependents": "self+spouse", "income_band": "10L-25L", | |
| "primary_goal": "first_buy", "location_tier": "metro", | |
| "health_conditions": [], | |
| } | |
| found = False | |
| for pid in _catalogue_ids(): | |
| policy = _resolve_policy(pid) | |
| if policy is None: | |
| continue | |
| sc = build_scorecard( | |
| policy, insurer_reviews=_insurer_reviews(policy), | |
| profile=spouse_profile, | |
| ) | |
| ps = sc.profile_summary | |
| if ps and any("maternity" in s.lower() for s in ps.strengths): | |
| found = True | |
| break | |
| assert found, ( | |
| "no maternity strength surfaced for ANY policy even with a spouse " | |
| "on the profile β suppression rule is stuck off" | |
| ) | |
| def test_insufficient_data_yields_empty_profile_summary(): | |
| """A genuinely-bare policy takes the defined honest path and the | |
| profile_summary is the empty form (caller falls back to one_liner).""" | |
| sc = build_scorecard( | |
| {"policy_id": "x__bare", "policy_name": "Bare", "insurer_slug": "x"} | |
| ) | |
| assert sc.insufficient_data is True | |
| assert sc.profile_summary is not None | |
| assert sc.profile_summary.strengths == [] | |
| assert sc.profile_summary.caveat is None | |
| def test_copay_preference_tag_only_when_user_states_zero_copay(): | |
| """The '(your stated preference)' qualifier on the zero-co-pay strength | |
| appears IFF the profile carries copay_pct == 0 β deterministic, derived, | |
| never fabricated.""" | |
| # bajaj-allianz__health-guard has copayment_pct == 0 in curated facts. | |
| policy = _resolve_policy("star-health__star-assure") | |
| assert policy is not None | |
| rv = _insurer_reviews(policy) | |
| no_pref = build_scorecard(policy, insurer_reviews=rv, profile={ | |
| "age": 30, "dependents": "self", "income_band": "10L-25L", | |
| "primary_goal": "first_buy", "location_tier": "metro", | |
| "health_conditions": [], | |
| }).profile_summary | |
| with_pref = build_scorecard(policy, insurer_reviews=rv, profile={ | |
| "age": 30, "dependents": "self", "income_band": "10L-25L", | |
| "primary_goal": "first_buy", "location_tier": "metro", | |
| "health_conditions": [], "copay_pct": 0, | |
| }).profile_summary | |
| zc_no = [s for s in no_pref.strengths if "co-payment" in s.lower()] | |
| zc_yes = [s for s in with_pref.strengths if "co-payment" in s.lower()] | |
| # star-assure has copayment_pct of 10 (mandatory) β no zero-copay | |
| # strength either way; assert the qualifier never appears WITHOUT the | |
| # preference, and that the tag string is exclusively preference-gated. | |
| assert all("your stated preference" not in s for s in zc_no), zc_no | |
| for s in with_pref.strengths: | |
| if "your stated preference" in s: | |
| assert "co-payment" in s.lower() | |
| if __name__ == "__main__": | |
| raise SystemExit(pytest.main([__file__, "-rA", "-p", "no:warnings"])) | |