InsuranceBot / tests /test_sum_insured_rationalisation.py
Claude Code
fix(pricing+scoring): rebuild premium engine to 0 logical violations + classification, faithfulness, links, writing
f266901
Raw
History Blame Contribute Delete
7.46 kB
"""SI RATIONALISATION (D1/D2/D3, 2026-05-16).
Pins the three user-approved decisions:
D1 — DISPLAY: continuous "₹X – ₹Y" band ONLY for a genuine continuous
offering; otherwise discrete corroborated tiers; "As per policy
schedule" when nothing corroborates.
D2 — PREMIUM CLAMP: the global ₹5 L / ₹1 Cr clamp is REMOVED; a policy
with no published SI prices against desired_sum_insured_inr (else
₹10 L) and surfaces the verbatim disclosure.
D3 — SOURCE-QUOTE CORROBORATION: deterministically drop every SI value
the field's own source_quote does not genuinely state. No LLM, no
fabrication.
"""
from backend.sum_insured import (
corroborate,
corroborated_values,
classify,
rationalise,
)
from backend.premium_calculator import (
resolve_profile_sum_insured,
fallback_sum_insured_for_unpublished,
unpublished_si_disclosure,
)
# ---------------------------------------------------------------------------
# D3 — source-quote corroboration (deterministic, no fabrication)
# ---------------------------------------------------------------------------
def test_inline_lakh_and_crore_units_corroborate():
# "3L, 5L, 10L" → ₹3/5/10 L all corroborate; a value the quote doesn't
# state (₹4 L) is dropped.
q = "options of SI of ₹ 3L, 5L, 10L."
assert corroborate([300_000, 400_000, 500_000, 1_000_000], q) == [
300_000, 500_000, 1_000_000
]
# "1 Crore" → ₹1 Cr corroborates.
assert 10_000_000 in corroborated_values("Maximum Sum insured of ` 1 Crore")
def test_header_unit_governs_a_bare_list():
# "(Rs. in Lakhs) 3.00 5.00 10.00 …" — the unit label governs the whole
# list; each bare number is a lakh figure and corroborates.
q = "Basic Sum Insured (Rs. in Lakh) 3.00 5.00 10.00 15.00 20.00, 25.00, 50.00"
kept = corroborate([300_000, 500_000, 1_000_000, 1_500_000, 5_000_000], q)
assert kept == [300_000, 500_000, 1_000_000, 1_500_000, 5_000_000]
def test_indian_grouping_is_collapsed_but_decimal_list_is_not():
# Indian grouping joins into one number…
assert 500_000 in corroborated_values("Sum Insured of Rs 5,00,000")
# …but a decimal list separated by comma-space is NOT mis-joined.
q = "Sum insured (₹) Lakhs 0.5, 1, 1.5, 2, 2.5, 3"
kept = corroborate([50_000, 100_000, 150_000, 200_000, 250_000, 300_000], q)
assert kept == [50_000, 100_000, 150_000, 200_000, 250_000, 300_000]
def test_uncorroborated_values_are_dropped_completely():
# A daily-cash product: "₹ 500, 1000, 2000…" are per-day amounts, not SI
# — no lakh/crore unit, all sub-₹10k → nothing corroborates.
q = "Daily Cash Amount of ₹ 500, 1000, 2000, 3000, 5000 per day"
assert corroborate([500, 1000, 2000, 3000, 5000], q) == []
# Truncated quote with no figures → nothing corroborates.
assert corroborate([500_000, 1_000_000], "Sum Insured options range from Rs") == []
def test_no_fabrication_only_confirms_listed_values():
# The quote mentions ₹50 L but the policy never lists it → it is NOT
# added. corroborate() can only ever return a subset of the input list.
q = "Sum Insured 3 lacs to 50 lacs"
out = corroborate([300_000, 1_000_000], q)
assert set(out).issubset({300_000, 1_000_000})
assert 5_000_000 not in out
# ---------------------------------------------------------------------------
# D1 — band vs tiers vs none classification + display contract
# ---------------------------------------------------------------------------
def test_band_requires_range_language_and_wide_spread():
# Range language + wide corroborated spread → band.
assert classify([50_000, 1_000_000], "₹ 50,000 to ₹ 10L, in multiple of ₹ 50,000") == "band"
# Range language but narrow spread → still discrete tiers.
assert classify([300_000, 500_000], "Sum Insured 3 lacs to 5 lacs") == "tiers"
# No range language → discrete tiers even if wide.
assert classify([300_000, 7_500_000], "3 lacs / 75 lacs") == "tiers"
# Nothing corroborated → none.
assert classify([], "anything") == "none"
def test_rationalise_packages_the_display_view():
band = rationalise([50_000, 1_000_000], "₹ 50,000 to ₹ 10L, in multiples of ₹ 50,000")
assert band.kind == "band" and band.is_band
assert band.min_inr == 50_000 and band.max_inr == 1_000_000
tiers = rationalise([300_000, 750_000, 1_000_000],
"Sum Insured 3 lacs to 7.5 lacs and 10 Lacs")
assert tiers.kind == "tiers" and not tiers.is_band
assert tiers.tiers == [300_000, 750_000, 1_000_000]
none = rationalise([500_000], "Sum Insured options range from Rs")
assert none.kind == "none"
assert none.min_inr is None and none.max_inr is None and none.tiers == []
# ---------------------------------------------------------------------------
# D2 — clamp removal + unpublished-SI fallback + verbatim disclosure
# ---------------------------------------------------------------------------
def test_clamp_is_removed_real_target_is_priced():
# Was clamped to ₹1 Cr / ₹5 L respectively pre-D2.
assert resolve_profile_sum_insured({"desired_sum_insured_inr": 20_000_000}) == 20_000_000
assert resolve_profile_sum_insured({"desired_sum_insured_inr": 100_000}) == 100_000
# Grid snap still applies.
assert resolve_profile_sum_insured({"desired_sum_insured_inr": 1_234_000}) == 1_250_000
def test_unpublished_si_fallback_precedence():
# desired_sum_insured_inr wins when present…
assert fallback_sum_insured_for_unpublished({"desired_sum_insured_inr": 2_500_000}) == 2_500_000
# …else the ₹10 L default (D2 spec).
assert fallback_sum_insured_for_unpublished({}) == 1_000_000
assert fallback_sum_insured_for_unpublished(None) == 1_000_000
assert fallback_sum_insured_for_unpublished({"desired_sum_insured_inr": 0}) == 1_000_000
def test_disclosure_string_is_verbatim():
assert unpublished_si_disclosure(1_000_000) == (
"This plan does not publish its sum insured, so the estimate is shown for ₹10 L cover."
)
assert unpublished_si_disclosure(2_500_000) == (
"This plan does not publish its sum insured, so the estimate is shown for ₹25 L cover."
)
assert unpublished_si_disclosure(15_000_000) == (
"This plan does not publish its sum insured, so the estimate is shown for ₹1.5 Cr cover."
)
# ---------------------------------------------------------------------------
# End-to-end against the real curated layer (proves it runs on live data)
# ---------------------------------------------------------------------------
def test_end_to_end_partition_on_curated_layer():
import backend.main as m
cf = m._load_curated_facts()
kinds = {"band": 0, "tiers": 0, "none": 0}
for pid, data in cf.items():
if pid != data.get("policy_id", pid):
continue
sio = data.get("sum_insured_options")
if not (isinstance(sio, list) and sio):
continue
view = rationalise(sio, m._si_source_quote(data))
kinds[view.kind if view.kind in ("band", "none") else "tiers"] += 1
# Every curated entry with an SI list is partitioned into exactly one
# bucket; the corroboration filter must keep real bands non-empty.
assert kinds["band"] >= 1
assert kinds["tiers"] >= 1
assert sum(kinds.values()) >= 60