Spaces:

rohitsar567
/

InsuranceBot

Sleeping

InsuranceBot / tests /test_premium_attribute_and_normalization.py

Claude Code

fix(pricing+scoring): rebuild premium engine to 0 logical violations + classification, faithfulness, links, writing

f266901 about 2 months ago

4.82 kB

	"""Guards for the premium correctness work (#38 + #36-B + #37b).

	Pins the bugs the user surfaced 2026-05-18:
	- canonical-match: doctype-suffixed ids reach their real sample
	- sample sanity guard: a bad curated sample (SBI Arogya Supreme
	brochure-extract, ~₹10k/L) can NEVER emit an absurd premium
	- sample normalization (#38): floater-priced samples are NOT
	double-counted by the floater multiplier
	- attribute model (#36-B): quote-less policies are differentiated by
	product TYPE — a top-up is not priced like a comprehensive plan
	- provenance label (#37b): sample-anchored vs modelled is explicit
	- REGRESSION: the sample-anchored policies that were already sane
	must stay sane (no swing) when none of the above misfires.
	"""

	from __future__ import annotations

	import sys
	from pathlib import Path

	_REPO = Path(__file__).resolve().parent.parent
	if str(_REPO) not in sys.path:
	sys.path.insert(0, str(_REPO))

	from backend.premium_calculator import ( # noqa: E402
	_attribute_base_factor,
	estimate,
	)

	PROFILE = dict(
	age=34, sum_insured_inr=1_200_000, city_tier="metro", smoker=False,
	family_size=3, pre_existing_conditions="none", copayment_pct=0.0,
	)


	def _pt(pid, **over):
	return estimate(policy_id=pid, {PROFILE, **over})


	# --- attribute base factor: type-differentiated, comprehensive == 1.0 ----

	def test_attribute_factor_by_type():
	# Post-rebuild factors via the real-fact-aware _policy_product_type
	# classifier (synthetic ids fall back to id keywords).
	assert _attribute_base_factor("x-insurer__no-such-comprehensive-plan") == 1.0
	assert _attribute_base_factor("acko__x-super-top-up__wordings") == 0.32
	assert _attribute_base_factor("x__hospital-cash") == 0.30
	assert _attribute_base_factor("aditya-birla__activ-secure-cancer-secure") == 0.50
	assert _attribute_base_factor("acko__arogya-sanjeevani") == 0.70
	assert _attribute_base_factor(None) == 1.0


	# --- #38/#44: SBI bad data REPLACED by real harvested samples ----------

	def test_sbi_now_real_anchored_and_sane():
	# The bad brochure-extract was physically replaced by 2 real official
	# SBI rate-chart figures (UIN SBIHLIP21043V012122) + unquarantined.
	# It must now be sample-anchored AND sane (never the ₹146,800 absurd).
	e = _pt("sbi-general__arogya-supreme__brochure")
	assert e.base_sample_used is not None, "SBI should now use its real harvested sample"
	assert 3_000 < e.point_estimate_inr < 60_000, (
	f"SBI out of sane band: ₹{e.point_estimate_inr:,}"
	)


	# --- #38 regression: real-sample policies stay sample-anchored & sane ---

	def test_sample_anchored_policies_not_regressed():
	for pid, lo, hi in [
	("icici-lombard__elevate__brochure", 8_000, 60_000),
	("hdfc-ergo__optima-secure__wordings", 6_000, 50_000),
	("aditya-birla__group-activ-health__wordings", 6_000, 50_000),
	]:
	e = _pt(pid)
	assert e.base_sample_used is not None, f"{pid} lost its real sample"
	assert lo < e.point_estimate_inr < hi, (
	f"{pid} swung out of sane band: ₹{e.point_estimate_inr:,}"
	)


	def test_legit_topup_sample_preserved_cheap():
	e = _pt("royal-sundaram__advanced-top-up__brochure", sum_insured_inr=4_500_000)
	assert e.base_sample_used is not None
	assert e.point_estimate_inr < 15_000, (
	f"legit top-up sample broken: ₹{e.point_estimate_inr:,}"
	)


	# --- #36-B: quote-less policies of different TYPE must NOT collide ------

	def test_quoteless_types_do_not_collide():
	comprehensive = _pt("royal-sundaram__family-plus__cis").point_estimate_inr
	topup = _pt("acko__acko-health-iii-platinum-super-top-up__wordings").point_estimate_inr
	cancer = _pt("aditya-birla__activ-secure-cancer-secure__brochure").point_estimate_inr
	assert comprehensive != topup, "top-up priced same as comprehensive (collision)"
	assert comprehensive != cancer, "cancer plan priced same as comprehensive"
	assert topup < comprehensive, "top-up must be materially cheaper"


	# --- #37b: provenance label is explicit and correct --------------------

	def test_provenance_label_distinguishes_sample_vs_model():
	s = _pt("icici-lombard__elevate__brochure")
	assert s.base_sample_used is not None
	assert "public quote we collected" in s.methodology

	# A policy with no curated sample AND no extraction → model path
	# (royal-sundaram__family-plus now HAS a real harvested sample, so use
	# a synthetic id that can never resolve to a sample).
	m = _pt("nonexistent-insurer__no-such-plan-zzz__wordings")
	assert m.base_sample_used is None
	assert "Modelled" in m.methodology and "not a quote" in m.methodology


	if __name__ == "__main__":
	import pytest
	raise SystemExit(pytest.main([__file__, "-v"]))