Spaces:

rohitsar567
/

InsuranceBot

Sleeping

App Files Files Community

InsuranceBot / backend /needs_finder.py

rohitsar567

fix(#64): preserve EXACT budget ₹ — slider shows what the user said

06c7cbc about 2 months ago

Raw

History Blame Contribute Delete

12.5 kB

	"""User profile state + slot-order hint for the fact-find.

	Fact-find phrasing lives in `backend/single_brain.py` (the single-LLM-call
	brain); this module provides:

	- `Profile` — accumulated user state (imported widely)
	- `record_answer` — slot-write helper used by session_state
	- INR / budget / income parsers — used by the brain tool layer
	- `is_field_set` — local helper for record_answer + next_question
	- `next_question` — returns the field NAME (str) of the next
	missing slot in canonical order. Used by
	`/api/profile/completeness`'s `next_question_hint`.

	Public API:
	- Profile dataclass
	- next_question(profile) -> str \| None (field name, None = complete)
	- record_answer(profile, field_name, raw_value) -> Profile
	"""

	from __future__ import annotations

	import re
	from dataclasses import dataclass, field
	from typing import Any, Optional


	@dataclass
	class Profile:
	"""User profile accumulated during fact-find."""
	name: Optional[str] = None # KI-040 — humanise + key for cross-session lookup
	age: Optional[int] = None
	dependents: Optional[str] = None # "self", "self+spouse", "self+spouse+kids", "self+parents", etc.
	income_band: Optional[str] = None # "under_5L", "5L-10L", "10L-25L", "25L+"
	existing_cover_inr: Optional[int] = None # 0 means none
	primary_goal: Optional[str] = None # "first_buy", "upgrade", "compare_specific", "tax_planning"
	location_tier: Optional[str] = None # "metro", "tier1", "tier2", "tier3"
	parents_to_insure: Optional[bool] = None
	parents_age_max: Optional[int] = None # if parents_to_insure
	parents_has_ped: Optional[bool] = None # if parents_to_insure
	budget_band: Optional[str] = None # "under_15k", "15k_30k", "30k_60k", "60k+"
	budget_inr: Optional[int] = None # #64 — EXACT ₹/yr the user stated/slid;
	# preserved losslessly so the UI shows what they said (₹15,000), not a
	# 4-band representative (₹12k). budget_band is still derived for pricing.
	desired_sum_insured_inr: Optional[int] = None # SOFT pricing input (post-recap)
	health_conditions: Optional[list[str]] = field(default_factory=list) # ["diabetes", "hypertension", ...]
	# D2 (2026-05-15) — co-pay tolerance + family medical history. Coupled
	# SLOT_UNION additions captured via RULE 2.5 post-recap, both flow into
	# premium_calculator (copay discount + family-history loading) and
	# retrieval (family-history rider boost keywords).
	copay_pct: Optional[int] = None # 0-50, % of every claim user accepts
	family_medical_history: list[str] = field(default_factory=list) # blood-family conditions
	smoker: Optional[bool] = None # KI-275 — tobacco use, +30-50% premium loading
	asked: list[str] = field(default_factory=list) # question IDs / field names already asked
	free_form_session: bool = False # True = user asks free questions, not driven by us
	# KI-063 (2026-05-15) — per-user policy interaction log so the bot
	# remembers which policies were shown / selected / rejected across
	# sessions. Each entry is a dict with shape:
	# {policy_slug, insurer, event_at (ISO Z), session_id, reason}
	# Dedup at write-time on (policy_slug, event_type) — re-events just
	# bump event_at + session_id rather than appending duplicates.
	shown_policies: list[dict] = field(default_factory=list) # KI-063
	selected_policies: list[dict] = field(default_factory=list) # KI-063
	rejected_policies: list[dict] = field(default_factory=list) # KI-063


	# ----------------------------------------------------------------------------
	# Free-text INR amount parser for budget + income. Bare digits ("30000"),
	# "30 thousand", "30 grand", "₹30,000", "1 lakh", "1.5L" all map cleanly
	# to a rupee amount.
	# ----------------------------------------------------------------------------

	def _parse_inr_amount(text: str) -> Optional[int]:
	"""Extract an INR amount in rupees from free text.

	Handles:
	- "30000", "30,000", "₹30,000", "Rs 30000", "rs. 30000"
	- "30k", "30 k", "30K"
	- "30 thousand", "30 grand"
	- "1 lakh", "1.5 lakh", "1L", "1.5L", "1 lac"
	- "1 crore", "1cr"
	- strips fluff: "maximum 30000", "I can pay 30000", "around 25000"
	- tolerates per-year qualifiers: "/year", "per year", "p.a."

	Rejects bare digits below ₹1000 (no plausible annual health insurance
	budget/income falls there) and rejects any text whose only number is
	in an age context ("29 years old", "age 29", "I am 29"), so an age
	answer is never misread as a rupee amount.

	Returns the integer rupee amount, or None if no number is recognisable
	or if the only numbers in the text are clearly not currency.
	"""
	if not text:
	return None
	s = str(text).lower().strip()
	# Strip currency symbols + thousands separators so "₹30,000" parses.
	s = s.replace("₹", " ").replace("rs.", " ").replace("rs", " ")
	s = s.replace(",", "")
	# Crore (highest unit first so longer alternation wins).
	m = re.search(r"(\d+(?:\.\d+)?)\s*(?:cr\|crore\|crores)\b", s)
	if m:
	try:
	return int(float(m.group(1)) * 10_000_000)
	except ValueError:
	return None
	# Lakh / lac.
	m = re.search(r"(\d+(?:\.\d+)?)\s*(?:l(?:akh\|ac)?s?)\b", s)
	if m:
	try:
	return int(float(m.group(1)) * 100_000)
	except ValueError:
	return None
	# Thousand / grand / k.
	m = re.search(r"(\d+(?:\.\d+)?)\s*(?:thousand\|grand\|k)\b", s)
	if m:
	try:
	return int(float(m.group(1)) * 1_000)
	except ValueError:
	return None
	# KI-161 — bare-digit fallback now guarded against age contexts.
	# If the text is clearly about age, refuse to interpret any number
	# as a currency amount.
	if re.search(
	r"\b(?:year\|years\|yr\|yrs\|y\so)\s(?:old)?\b\|\bage\b\|\bi\s*am\s+\d{1,3}\b",
	s,
	):
	return None
	# Bare digit run — pick the largest number-like token (handles
	# "maximum 30000", "around 25000", "I can pay 30000"). Magnitude
	# floor of ₹1000 — anything smaller is implausible for an annual
	# health-insurance budget or income.
	nums = re.findall(r"\d+(?:\.\d+)?", s)
	if nums:
	try:
	amt = int(float(max(nums, key=lambda x: float(x))))
	except ValueError:
	return None
	if amt < 1_000:
	return None
	return amt
	return None


	def _parse_budget_band(text: str) -> Optional[str]:
	"""Map free-text budget text → one of under_15k / 15k_30k / 30k_60k / 60k+.

	KI-149 (2026-05-15). Falls back to range hints ("15-30k", "30 to 60k")
	before delegating to `_parse_inr_amount` for a single number.
	"""
	if not text:
	return None
	s = str(text).lower()
	# Explicit bucket hints first — order matters (more specific wins).
	if re.search(r"60\sk\s\+\|>\s*60\|more\s+than\s+60\|above\s+60\|over\s+60", s):
	return "60k+"
	if re.search(r"30\s[-to]+\s60\sk?\|30k\s[-_]\s60k\|30\sto\s*60", s):
	return "30k_60k"
	if re.search(r"15\s[-to]+\s30\sk?\|15k\s[-_]\s30k\|15\sto\s*30", s):
	return "15k_30k"
	if re.search(r"under\s15\|less\s+than\s+15\|below\s+15\|<\s15", s):
	return "under_15k"
	# Single amount → bucket.
	amt = _parse_inr_amount(s)
	if amt is None:
	return None
	if amt < 15_000:
	return "under_15k"
	if amt < 30_000:
	return "15k_30k"
	if amt < 60_000:
	return "30k_60k"
	return "60k+"


	def _parse_income_band(text: str) -> Optional[str]:
	"""Map free-text income text → one of under_5L / 5L-10L / 10L-25L / 25L+.

	KI-149 (2026-05-15). Same approach as `_parse_budget_band`: explicit
	bucket hints first, then a single rupee amount → bucket.
	"""
	if not text:
	return None
	s = str(text).lower()
	if re.search(r"25\sl\s\+\|>\s*25\|more\s+than\s+25\|above\s+25\|over\s+25", s):
	return "25L+"
	if re.search(r"10\s[-to]+\s25\sl?\|10l\s[-_]\s25l\|10\sto\s*25", s):
	return "10L-25L"
	if re.search(r"5\s[-to]+\s10\sl?\|5l\s[-_]\s10l\|5\sto\s*10", s):
	return "5L-10L"
	if re.search(r"under\s5\|less\s+than\s+5\|below\s+5\|<\s5", s):
	return "under_5L"
	amt = _parse_inr_amount(s)
	if amt is None:
	return None
	# Income is parsed in rupees; 5 lakh = 500_000.
	if amt < 500_000:
	return "under_5L"
	if amt < 1_000_000:
	return "5L-10L"
	if amt < 2_500_000:
	return "10L-25L"
	return "25L+"


	# ----------------------------------------------------------------------------
	# Engine
	# ----------------------------------------------------------------------------

	# Canonical slot order for the fact-find hint API. The actual question
	# phrasing lives in `single_brain.py`; this list only encodes which
	# Profile attribute to fill next when nothing else is driving the
	# conversation.
	_SLOT_ORDER: list[str] = [
	"name",
	"age",
	"dependents",
	"location_tier",
	"income_band",
	"primary_goal",
	"existing_cover_inr",
	"budget_band",
	"health_conditions",
	]


	def is_field_set(profile: Profile, field_name: str) -> bool:
	v = getattr(profile, field_name, None)
	if v is None:
	return False
	if isinstance(v, (list, str)) and len(v) == 0:
	return False
	return True


	def next_question(profile: Profile) -> Optional[str]:
	"""Return the field NAME of the next missing slot, or None if complete.

	The caller in `backend/main.py:/api/profile/completeness` uses this
	only to hint to the frontend which slot to ask next; the actual
	question phrasing is produced by `single_brain.py`.

	A free-form session (user driving free questions) returns None so the
	hint endpoint reports "nothing to ask".
	"""
	if profile.free_form_session:
	return None

	for slot in _SLOT_ORDER:
	if not is_field_set(profile, slot):
	return slot
	return None


	# Question-id → Profile field-name aliases. Some callers (notably
	# `session_state.record_answer` driven by `awaiting_question_id`) pass a
	# question ID rather than a Profile attribute name; this maps them.
	_QID_TO_FIELD: dict[str, str] = {
	"existing_cover": "existing_cover_inr",
	"location": "location_tier",
	"parents_age": "parents_age_max",
	"budget": "budget_band",
	}


	def record_answer(profile: Profile, question_id: str, raw_answer: Any) -> Profile:
	"""Mutate profile in place with a raw answer for a named slot.

	Applies the parser map inline. `question_id` may be either a Profile
	attribute name (preferred) or one of the question IDs in
	`_QID_TO_FIELD`.
	"""
	field_name = _QID_TO_FIELD.get(question_id, question_id)
	if not hasattr(profile, field_name):
	return profile
	value: Any = raw_answer
	parser = _PARSERS.get(field_name)
	if parser is not None:
	try:
	value = parser(raw_answer)
	except Exception:
	value = None
	if value is not None and value != "":
	setattr(profile, field_name, value)
	# KI-095 — only mark slot asked once setattr succeeds, so a parse
	# failure doesn't leave the slot in an asked-but-empty desync state.
	if question_id not in profile.asked:
	profile.asked.append(question_id)
	return profile


	def _parse_age(s: Any) -> Optional[int]:
	digits = "".join(c for c in str(s) if c.isdigit())[:3]
	if not digits:
	return None
	try:
	return int(digits) or None
	except ValueError:
	return None


	def _parse_existing_cover(s: Any) -> Optional[int]:
	text = str(s).lower()
	if any(k in text for k in ("no", "none", "nothing", "zero", "not")):
	return 0
	digits = "".join(c for c in text if c.isdigit())[:6]
	if not digits:
	return None
	try:
	amt = int(digits)
	except ValueError:
	return None
	if any(k in text for k in ("l", "lakh", "lac")):
	amt *= 100_000
	return amt or None


	# Parser dispatch by Profile field name. Slots not listed accept the raw value.
	_PARSERS: dict[str, Any] = {
	"age": _parse_age,
	"income_band": _parse_income_band,
	"existing_cover_inr": _parse_existing_cover,
	"budget_band": _parse_budget_band,
	}