InsuranceBot / tests /test_profile_summary.py
rohitsar567's picture
feat(#31): deterministic profile-aware {strengths,caveat} policy summary on all 3 surfaces
d486a56
Raw
History Blame Contribute Delete
17.8 kB
"""Task #31 β€” deterministic, profile-aware {strengths, caveat} guard.
WHAT THIS LOCKS DOWN
--------------------
`backend.scorecard.build_profile_summary` replaces the generic grade
one-liner with a PER (profile Γ— policy) list of concrete strengths + the
single most grade-capping trade-off, computed on the SAME pass as the grade.
The non-negotiable invariants this test pins for the FULL 148-policy
catalogue, profile-neutral AND across 6 representative profiles:
1. Shape: 0 ≀ len(strengths) ≀ 5. β‰₯3 whenever β‰₯3 qualifying facts exist;
fewer ONLY when the policy genuinely has fewer real facts (never padded).
insufficient-data β‡’ strengths == [] (caller falls back to one_liner).
2. No junk: no empty bullet, no "/100", no standalone letter grade, no
literal "null"/"None".
3. Deductible bullet present IFF
premium_calculator.policy_deductible_support(pid)[0] is True β€” across
the catalogue that is EXACTLY {bajaj-allianz__health-guard,
star-health__star-assure} (BUG #29 invariant).
4. Non-fabrication: every numeric token in every strength is regex-
extractable AND traces to a value the scorecard's OWN helpers can read
off that policy / insurer_reviews (a strength can never assert a number
the grade didn't see).
5. Caveat is None OR derived from a "βˆ’ "-prefixed (U+2212 + space) signal
literally present in that scorecard's sub.signals (never invented /
contradicting).
6. Maternity strength absent when no spouse/partner on the profile.
7. Determinism: same (policy, profile) twice β‡’ byte-identical.
"""
from __future__ import annotations
import json
import re
import sys
from pathlib import Path
import pytest
_REPO_ROOT = Path(__file__).resolve().parent.parent
if str(_REPO_ROOT) not in sys.path:
sys.path.insert(0, str(_REPO_ROOT))
import backend.main as M # noqa: E402
from backend.config import settings # noqa: E402
from backend.premium_calculator import policy_deductible_support # noqa: E402
from backend.scorecard import ( # noqa: E402
_profile_tuned_weights,
build_profile_summary,
build_scorecard,
)
# The exact catalogue-wide deductible-supporting set (BUG #29). Anything else
# claiming a voluntary-deductible strength is a fabrication.
_DEDUCTIBLE_POLICIES = {"bajaj-allianz__health-guard", "star-health__star-assure"}
# 6 representative profiles spanning the weight-tuner's branches.
_PROFILES = [
None,
{ # young, healthy, first-time, tax goal, metro
"age": 28, "dependents": "self", "income_band": "10L-25L",
"primary_goal": "tax_planning", "location_tier": "metro",
"health_conditions": [], "existing_cover_inr": 0, "copay_pct": 0,
},
{ # senior + spouse + diabetes + family history + has cover
"age": 58, "dependents": "self+spouse", "income_band": "25L+",
"primary_goal": "upgrade", "location_tier": "metro",
"health_conditions": ["diabetes"], "family_medical_history": ["heart"],
"existing_cover_inr": 500000, "parents_to_insure": False,
},
{ # family with kids, tier3, budget-tight
"age": 41, "dependents": "self+spouse+kids", "income_band": "5L-10L",
"primary_goal": "first_buy", "location_tier": "tier3",
"health_conditions": [], "budget_band": "under_15k",
"existing_cover_inr": 0,
},
{ # parents-to-insure, parents have PED
"age": 35, "dependents": "self+parents", "income_band": "10L-25L",
"primary_goal": "compare_specific", "location_tier": "tier2",
"health_conditions": [], "parents_to_insure": True,
"parents_age_max": 68, "parents_has_ped": True,
},
{ # mid, hypertension, copay tolerance stated
"age": 47, "dependents": "self+spouse", "income_band": "25L+",
"primary_goal": "upgrade", "location_tier": "metro",
"health_conditions": ["hypertension"], "copay_pct": 0,
"existing_cover_inr": 1000000,
},
]
_NUM_RE = re.compile(r"\d[\d,]*(?:\.\d+)?")
_GRADE_RE = re.compile(r"\b[ABCDF]\b")
def _catalogue_ids() -> list[str]:
cards = M._marketplace_catalogue(None)
assert len(cards) > 100, f"catalogue collapsed: {len(cards)}"
return [c.policy_id for c in cards]
def _resolve_policy(pid: str) -> dict | None:
"""Resolve a card id β†’ policy dict EXACTLY like /api/policies/{id}/scorecard."""
cur = M._load_curated_facts()
ep = settings.EXTRACTED_DIR / f"{pid}.json"
if ep.exists():
try:
policy = json.loads(ep.read_text())
except Exception:
return None
return M._merge_curated(
policy, cur.get(policy.get("policy_id", pid)) or cur.get(pid)
)
policy = (
cur.get(pid)
or cur.get(f"{pid}__wordings")
or cur.get(f"{pid}__cis")
or cur.get(f"{pid}__brochure")
or cur.get(f"{pid}__prospectus")
)
if not policy:
return None
policy = dict(policy)
policy.setdefault("policy_id", pid)
return policy
def _insurer_reviews(policy: dict) -> dict | None:
slug = policy.get("insurer_slug")
if not slug:
return None
rp = settings.DATA_DIR / "reviews" / f"{slug}.json"
if rp.exists():
try:
return json.loads(rp.read_text())
except Exception:
return None
return None
def _readable_numbers(policy: dict, sc, reviews: dict | None) -> set[str]:
"""Every integer/float a strength could LEGITIMATELY quote β€” each read
via the SAME scorecard helper (`_int` / `_pick_alias`) the generator uses
to build that strength, PLUS the insurer CSR/year. The non-fabrication
anchor: a strength may only state a number this set proves the scorecard
itself can read off this policy. A token outside this set is a
fabrication."""
from backend.scorecard import _int as _sc_int
ok: set[str] = set()
def _add(v):
if v is None:
return
ok.add(str(v).replace(",", ""))
try:
ok.add(str(int(float(v))))
ok.add(f"{float(v):.1f}")
except (TypeError, ValueError):
pass
# Numbers the scorecard surfaced in its own signals (helper-read).
for s in sc.sub_scores:
for sig in s.signals:
for m in _NUM_RE.findall(sig):
ok.add(m.replace(",", ""))
# Exactly the fields each strength reads via _int, read the SAME way.
for fld in (
"deductible_amount",
"max_entry_age",
"pre_existing_disease_waiting_months",
"maternity_waiting_months",
"network_hospital_count",
"no_claim_bonus_pct",
"copayment_pct",
):
_add(_sc_int(policy, fld))
# Insurer CSR (+ year) for the high-CSR strength.
if reviews:
cm = reviews.get("claim_metrics", {}) or {}
_add(cm.get("claim_settlement_ratio_pct"))
yr = str(cm.get("claim_settlement_ratio_year", "") or "")
for m in _NUM_RE.findall(yr):
ok.add(m.replace(",", ""))
return ok
@pytest.mark.parametrize("profile", _PROFILES, ids=lambda p: "neutral" if p is None else f"{p.get('age')}/{p.get('dependents')}")
def test_full_catalogue_profile_summary_invariants(profile):
"""Every catalogued policy, this profile: shape + no-junk + deductible
gate + non-fabrication + maternity-suppression + determinism."""
junk_fails: list[str] = []
shape_fails: list[str] = []
ded_fails: list[str] = []
fab_fails: list[str] = []
mat_fails: list[str] = []
det_fails: list[str] = []
caveat_fails: list[str] = []
has_spouse = bool(
profile
and any(
k in str(profile.get("dependents") or "").lower()
for k in ("spouse", "wife", "husband", "partner")
)
)
for pid in _catalogue_ids():
policy = _resolve_policy(pid)
if policy is None:
continue
reviews = _insurer_reviews(policy)
sc = build_scorecard(policy, insurer_reviews=reviews, profile=profile)
ps = sc.profile_summary
assert ps is not None, f"{pid}: profile_summary is None"
if sc.insufficient_data:
if ps.strengths != [] or ps.caveat is not None:
shape_fails.append(f"{pid}: insufficient β‡’ must be empty, got {ps}")
continue
# 1. shape
if not (0 <= len(ps.strengths) <= 5):
shape_fails.append(f"{pid}: {len(ps.strengths)} strengths (must be 0..5)")
# 2. no junk
for b in ps.strengths + ([ps.caveat] if ps.caveat else []):
if not b or not b.strip():
junk_fails.append(f"{pid}: empty bullet")
if "/100" in b:
junk_fails.append(f"{pid}: '/100' in {b!r}")
if b.strip() in {"null", "None"}:
junk_fails.append(f"{pid}: literal null/None {b!r}")
# A bare standalone grade letter as a whole bullet.
if _GRADE_RE.fullmatch(b.strip()):
junk_fails.append(f"{pid}: standalone grade letter {b!r}")
# 3. deductible gate
ded_strength = any(
"voluntary deductible" in s.lower() for s in ps.strengths
)
ded_ok = policy_deductible_support(pid)[0] is True
if ded_strength != ded_ok:
ded_fails.append(
f"{pid}: deductible strength={ded_strength} but support={ded_ok}"
)
# 4. non-fabrication β€” every numeric token traces to a readable value
readable = _readable_numbers(policy, sc, reviews)
for s in ps.strengths:
for tok in _NUM_RE.findall(s):
norm = tok.replace(",", "")
# tolerate "92.2" matching a stored 92.2 / 92 / "92.2"
cands = {norm, norm.split(".")[0]}
try:
cands.add(f"{float(norm):.1f}")
cands.add(str(int(float(norm))))
except ValueError:
pass
if not (cands & readable):
fab_fails.append(
f"{pid}: numeric {tok!r} in strength {s!r} not "
f"readable from policy/reviews"
)
# 5. caveat ⟺ the generator's EXACT contract: pick the most
# grade-capping, profile-relevant sub via the SAME profile-tuned
# weights (argmax weights[name]*(100-score)); its FIRST "βˆ’ "-prefixed
# signal must exist iff the caveat is non-None, and the caveat must
# be a deterministic transform that carries that signal's stripped
# text OR its numeric token (never invented / contradicting).
weights = _profile_tuned_weights(profile)
def _gap(sub):
return weights.get(sub.name, 0.0) * (100 - sub.score)
top = max(sc.sub_scores, key=_gap) if sc.sub_scores else None
first_neg = None
if top is not None:
first_neg = next(
(sig for sig in (top.signals or []) if sig.startswith("βˆ’ ")),
None,
)
if first_neg is None:
if ps.caveat is not None:
caveat_fails.append(
f"{pid}: caveat {ps.caveat!r} but top sub "
f"{getattr(top, 'name', None)!r} has NO 'βˆ’ ' signal"
)
else:
if ps.caveat is None:
caveat_fails.append(
f"{pid}: top sub has 'βˆ’ ' signal {first_neg!r} but "
f"caveat is None"
)
else:
raw = first_neg[2:].strip()
nums = _NUM_RE.findall(raw)
# The caveat is a deterministic plain-language transform of
# exactly THIS signal: it must carry the signal's number(s)
# OR a stable keyword from the signal β€” never unrelated text.
kws = [
"ped", "copay", "co-pay", "room rent", "csr",
"claim settlement", "cashless", "network", "initial",
"maternity", "deductible", "day-care", "day care",
]
rl = raw.lower()
cl = ps.caveat.lower()
traced = (
(nums and all(n in ps.caveat for n in nums))
or any(k in rl and k in cl for k in kws)
or raw in ps.caveat # generic "One trade-off: <raw>."
)
if not traced:
caveat_fails.append(
f"{pid}: caveat {ps.caveat!r} not a transform of the "
f"top sub's first 'βˆ’ ' signal {first_neg!r}"
)
# 6. maternity suppressed without spouse/partner
if not has_spouse:
if any("maternity" in s.lower() for s in ps.strengths):
mat_fails.append(f"{pid}: maternity strength without spouse")
# 7. determinism
sc2 = build_scorecard(policy, insurer_reviews=reviews, profile=profile)
if (
sc2.profile_summary.strengths != ps.strengths
or sc2.profile_summary.caveat != ps.caveat
):
det_fails.append(f"{pid}: non-deterministic profile_summary")
assert not shape_fails, f"SHAPE ({len(shape_fails)}): {shape_fails[:8]}"
assert not junk_fails, f"JUNK ({len(junk_fails)}): {junk_fails[:8]}"
assert not ded_fails, f"DEDUCTIBLE GATE ({len(ded_fails)}): {ded_fails[:8]}"
assert not fab_fails, f"FABRICATION ({len(fab_fails)}): {fab_fails[:8]}"
assert not caveat_fails, f"CAVEAT ({len(caveat_fails)}): {caveat_fails[:8]}"
assert not mat_fails, f"MATERNITY ({len(mat_fails)}): {mat_fails[:8]}"
assert not det_fails, f"DETERMINISM ({len(det_fails)}): {det_fails[:8]}"
def test_deductible_strength_is_exactly_the_two_known_policies():
"""The voluntary-deductible strength appears for EXACTLY
{bajaj-allianz__health-guard, star-health__star-assure} and no other
catalogued policy (the BUG #29 catalogue-wide invariant)."""
seen: set[str] = set()
for pid in _catalogue_ids():
policy = _resolve_policy(pid)
if policy is None:
continue
sc = build_scorecard(
policy, insurer_reviews=_insurer_reviews(policy), profile=None
)
ps = sc.profile_summary
if ps and any("voluntary deductible" in s.lower() for s in ps.strengths):
seen.add(pid)
assert seen == _DEDUCTIBLE_POLICIES, (
f"deductible strength leaked / missing: got {sorted(seen)}, "
f"expected {sorted(_DEDUCTIBLE_POLICIES)}"
)
def test_maternity_appears_when_spouse_present_and_policy_covers_it():
"""Positive control for the maternity-suppression rule: with a spouse on
the profile, at least one maternity-covering policy DOES surface a
maternity strength (so the rule isn't just always-off)."""
spouse_profile = {
"age": 32, "dependents": "self+spouse", "income_band": "10L-25L",
"primary_goal": "first_buy", "location_tier": "metro",
"health_conditions": [],
}
found = False
for pid in _catalogue_ids():
policy = _resolve_policy(pid)
if policy is None:
continue
sc = build_scorecard(
policy, insurer_reviews=_insurer_reviews(policy),
profile=spouse_profile,
)
ps = sc.profile_summary
if ps and any("maternity" in s.lower() for s in ps.strengths):
found = True
break
assert found, (
"no maternity strength surfaced for ANY policy even with a spouse "
"on the profile β€” suppression rule is stuck off"
)
def test_insufficient_data_yields_empty_profile_summary():
"""A genuinely-bare policy takes the defined honest path and the
profile_summary is the empty form (caller falls back to one_liner)."""
sc = build_scorecard(
{"policy_id": "x__bare", "policy_name": "Bare", "insurer_slug": "x"}
)
assert sc.insufficient_data is True
assert sc.profile_summary is not None
assert sc.profile_summary.strengths == []
assert sc.profile_summary.caveat is None
def test_copay_preference_tag_only_when_user_states_zero_copay():
"""The '(your stated preference)' qualifier on the zero-co-pay strength
appears IFF the profile carries copay_pct == 0 β€” deterministic, derived,
never fabricated."""
# bajaj-allianz__health-guard has copayment_pct == 0 in curated facts.
policy = _resolve_policy("star-health__star-assure")
assert policy is not None
rv = _insurer_reviews(policy)
no_pref = build_scorecard(policy, insurer_reviews=rv, profile={
"age": 30, "dependents": "self", "income_band": "10L-25L",
"primary_goal": "first_buy", "location_tier": "metro",
"health_conditions": [],
}).profile_summary
with_pref = build_scorecard(policy, insurer_reviews=rv, profile={
"age": 30, "dependents": "self", "income_band": "10L-25L",
"primary_goal": "first_buy", "location_tier": "metro",
"health_conditions": [], "copay_pct": 0,
}).profile_summary
zc_no = [s for s in no_pref.strengths if "co-payment" in s.lower()]
zc_yes = [s for s in with_pref.strengths if "co-payment" in s.lower()]
# star-assure has copayment_pct of 10 (mandatory) β€” no zero-copay
# strength either way; assert the qualifier never appears WITHOUT the
# preference, and that the tag string is exclusively preference-gated.
assert all("your stated preference" not in s for s in zc_no), zc_no
for s in with_pref.strengths:
if "your stated preference" in s:
assert "co-payment" in s.lower()
if __name__ == "__main__":
raise SystemExit(pytest.main([__file__, "-rA", "-p", "no:warnings"]))