"""Guards for the premium correctness work (#38 + #36-B + #37b). Pins the bugs the user surfaced 2026-05-18: - canonical-match: doctype-suffixed ids reach their real sample - sample sanity guard: a bad curated sample (SBI Arogya Supreme brochure-extract, ~₹10k/L) can NEVER emit an absurd premium - sample normalization (#38): floater-priced samples are NOT double-counted by the floater multiplier - attribute model (#36-B): quote-less policies are differentiated by product TYPE — a top-up is not priced like a comprehensive plan - provenance label (#37b): sample-anchored vs modelled is explicit - REGRESSION: the sample-anchored policies that were already sane must stay sane (no swing) when none of the above misfires. """ from __future__ import annotations import sys from pathlib import Path _REPO = Path(__file__).resolve().parent.parent if str(_REPO) not in sys.path: sys.path.insert(0, str(_REPO)) from backend.premium_calculator import ( # noqa: E402 _attribute_base_factor, estimate, ) PROFILE = dict( age=34, sum_insured_inr=1_200_000, city_tier="metro", smoker=False, family_size=3, pre_existing_conditions="none", copayment_pct=0.0, ) def _pt(pid, **over): return estimate(policy_id=pid, **{**PROFILE, **over}) # --- attribute base factor: type-differentiated, comprehensive == 1.0 ---- def test_attribute_factor_by_type(): # Post-rebuild factors via the real-fact-aware _policy_product_type # classifier (synthetic ids fall back to id keywords). assert _attribute_base_factor("x-insurer__no-such-comprehensive-plan") == 1.0 assert _attribute_base_factor("acko__x-super-top-up__wordings") == 0.32 assert _attribute_base_factor("x__hospital-cash") == 0.30 assert _attribute_base_factor("aditya-birla__activ-secure-cancer-secure") == 0.50 assert _attribute_base_factor("acko__arogya-sanjeevani") == 0.70 assert _attribute_base_factor(None) == 1.0 # --- #38/#44: SBI bad data REPLACED by real harvested samples ---------- def test_sbi_now_real_anchored_and_sane(): # The bad brochure-extract was physically replaced by 2 real official # SBI rate-chart figures (UIN SBIHLIP21043V012122) + unquarantined. # It must now be sample-anchored AND sane (never the ₹146,800 absurd). e = _pt("sbi-general__arogya-supreme__brochure") assert e.base_sample_used is not None, "SBI should now use its real harvested sample" assert 3_000 < e.point_estimate_inr < 60_000, ( f"SBI out of sane band: ₹{e.point_estimate_inr:,}" ) # --- #38 regression: real-sample policies stay sample-anchored & sane --- def test_sample_anchored_policies_not_regressed(): for pid, lo, hi in [ ("icici-lombard__elevate__brochure", 8_000, 60_000), ("hdfc-ergo__optima-secure__wordings", 6_000, 50_000), ("aditya-birla__group-activ-health__wordings", 6_000, 50_000), ]: e = _pt(pid) assert e.base_sample_used is not None, f"{pid} lost its real sample" assert lo < e.point_estimate_inr < hi, ( f"{pid} swung out of sane band: ₹{e.point_estimate_inr:,}" ) def test_legit_topup_sample_preserved_cheap(): e = _pt("royal-sundaram__advanced-top-up__brochure", sum_insured_inr=4_500_000) assert e.base_sample_used is not None assert e.point_estimate_inr < 15_000, ( f"legit top-up sample broken: ₹{e.point_estimate_inr:,}" ) # --- #36-B: quote-less policies of different TYPE must NOT collide ------ def test_quoteless_types_do_not_collide(): comprehensive = _pt("royal-sundaram__family-plus__cis").point_estimate_inr topup = _pt("acko__acko-health-iii-platinum-super-top-up__wordings").point_estimate_inr cancer = _pt("aditya-birla__activ-secure-cancer-secure__brochure").point_estimate_inr assert comprehensive != topup, "top-up priced same as comprehensive (collision)" assert comprehensive != cancer, "cancer plan priced same as comprehensive" assert topup < comprehensive, "top-up must be materially cheaper" # --- #37b: provenance label is explicit and correct -------------------- def test_provenance_label_distinguishes_sample_vs_model(): s = _pt("icici-lombard__elevate__brochure") assert s.base_sample_used is not None assert "public quote we collected" in s.methodology # A policy with no curated sample AND no extraction → model path # (royal-sundaram__family-plus now HAS a real harvested sample, so use # a synthetic id that can never resolve to a sample). m = _pt("nonexistent-insurer__no-such-plan-zzz__wordings") assert m.base_sample_used is None assert "Modelled" in m.methodology and "not a quote" in m.methodology if __name__ == "__main__": import pytest raise SystemExit(pytest.main([__file__, "-v"]))