InsuranceBot / backend /needs_finder.py
rohitsar567's picture
fix(#64): preserve EXACT budget β‚Ή β€” slider shows what the user said
06c7cbc
Raw
History Blame Contribute Delete
12.5 kB
"""User profile state + slot-order hint for the fact-find.
Fact-find phrasing lives in `backend/single_brain.py` (the single-LLM-call
brain); this module provides:
- `Profile` β€” accumulated user state (imported widely)
- `record_answer` β€” slot-write helper used by session_state
- INR / budget / income parsers β€” used by the brain tool layer
- `is_field_set` β€” local helper for record_answer + next_question
- `next_question` β€” returns the field NAME (str) of the next
missing slot in canonical order. Used by
`/api/profile/completeness`'s `next_question_hint`.
Public API:
- Profile dataclass
- next_question(profile) -> str | None (field name, None = complete)
- record_answer(profile, field_name, raw_value) -> Profile
"""
from __future__ import annotations
import re
from dataclasses import dataclass, field
from typing import Any, Optional
@dataclass
class Profile:
"""User profile accumulated during fact-find."""
name: Optional[str] = None # KI-040 β€” humanise + key for cross-session lookup
age: Optional[int] = None
dependents: Optional[str] = None # "self", "self+spouse", "self+spouse+kids", "self+parents", etc.
income_band: Optional[str] = None # "under_5L", "5L-10L", "10L-25L", "25L+"
existing_cover_inr: Optional[int] = None # 0 means none
primary_goal: Optional[str] = None # "first_buy", "upgrade", "compare_specific", "tax_planning"
location_tier: Optional[str] = None # "metro", "tier1", "tier2", "tier3"
parents_to_insure: Optional[bool] = None
parents_age_max: Optional[int] = None # if parents_to_insure
parents_has_ped: Optional[bool] = None # if parents_to_insure
budget_band: Optional[str] = None # "under_15k", "15k_30k", "30k_60k", "60k+"
budget_inr: Optional[int] = None # #64 β€” EXACT β‚Ή/yr the user stated/slid;
# preserved losslessly so the UI shows what they said (β‚Ή15,000), not a
# 4-band representative (β‚Ή12k). budget_band is still derived for pricing.
desired_sum_insured_inr: Optional[int] = None # SOFT pricing input (post-recap)
health_conditions: Optional[list[str]] = field(default_factory=list) # ["diabetes", "hypertension", ...]
# D2 (2026-05-15) β€” co-pay tolerance + family medical history. Coupled
# SLOT_UNION additions captured via RULE 2.5 post-recap, both flow into
# premium_calculator (copay discount + family-history loading) and
# retrieval (family-history rider boost keywords).
copay_pct: Optional[int] = None # 0-50, % of every claim user accepts
family_medical_history: list[str] = field(default_factory=list) # blood-family conditions
smoker: Optional[bool] = None # KI-275 β€” tobacco use, +30-50% premium loading
asked: list[str] = field(default_factory=list) # question IDs / field names already asked
free_form_session: bool = False # True = user asks free questions, not driven by us
# KI-063 (2026-05-15) β€” per-user policy interaction log so the bot
# remembers which policies were shown / selected / rejected across
# sessions. Each entry is a dict with shape:
# {policy_slug, insurer, event_at (ISO Z), session_id, reason}
# Dedup at write-time on (policy_slug, event_type) β€” re-events just
# bump event_at + session_id rather than appending duplicates.
shown_policies: list[dict] = field(default_factory=list) # KI-063
selected_policies: list[dict] = field(default_factory=list) # KI-063
rejected_policies: list[dict] = field(default_factory=list) # KI-063
# ----------------------------------------------------------------------------
# Free-text INR amount parser for budget + income. Bare digits ("30000"),
# "30 thousand", "30 grand", "β‚Ή30,000", "1 lakh", "1.5L" all map cleanly
# to a rupee amount.
# ----------------------------------------------------------------------------
def _parse_inr_amount(text: str) -> Optional[int]:
"""Extract an INR amount in rupees from free text.
Handles:
- "30000", "30,000", "β‚Ή30,000", "Rs 30000", "rs. 30000"
- "30k", "30 k", "30K"
- "30 thousand", "30 grand"
- "1 lakh", "1.5 lakh", "1L", "1.5L", "1 lac"
- "1 crore", "1cr"
- strips fluff: "maximum 30000", "I can pay 30000", "around 25000"
- tolerates per-year qualifiers: "/year", "per year", "p.a."
Rejects bare digits below β‚Ή1000 (no plausible annual health insurance
budget/income falls there) and rejects any text whose only number is
in an age context ("29 years old", "age 29", "I am 29"), so an age
answer is never misread as a rupee amount.
Returns the integer rupee amount, or None if no number is recognisable
or if the only numbers in the text are clearly not currency.
"""
if not text:
return None
s = str(text).lower().strip()
# Strip currency symbols + thousands separators so "β‚Ή30,000" parses.
s = s.replace("β‚Ή", " ").replace("rs.", " ").replace("rs", " ")
s = s.replace(",", "")
# Crore (highest unit first so longer alternation wins).
m = re.search(r"(\d+(?:\.\d+)?)\s*(?:cr|crore|crores)\b", s)
if m:
try:
return int(float(m.group(1)) * 10_000_000)
except ValueError:
return None
# Lakh / lac.
m = re.search(r"(\d+(?:\.\d+)?)\s*(?:l(?:akh|ac)?s?)\b", s)
if m:
try:
return int(float(m.group(1)) * 100_000)
except ValueError:
return None
# Thousand / grand / k.
m = re.search(r"(\d+(?:\.\d+)?)\s*(?:thousand|grand|k)\b", s)
if m:
try:
return int(float(m.group(1)) * 1_000)
except ValueError:
return None
# KI-161 β€” bare-digit fallback now guarded against age contexts.
# If the text is clearly about age, refuse to interpret any number
# as a currency amount.
if re.search(
r"\b(?:year|years|yr|yrs|y\s*o)\s*(?:old)?\b|\bage\b|\bi\s*am\s+\d{1,3}\b",
s,
):
return None
# Bare digit run β€” pick the largest number-like token (handles
# "maximum 30000", "around 25000", "I can pay 30000"). Magnitude
# floor of β‚Ή1000 β€” anything smaller is implausible for an annual
# health-insurance budget or income.
nums = re.findall(r"\d+(?:\.\d+)?", s)
if nums:
try:
amt = int(float(max(nums, key=lambda x: float(x))))
except ValueError:
return None
if amt < 1_000:
return None
return amt
return None
def _parse_budget_band(text: str) -> Optional[str]:
"""Map free-text budget text β†’ one of under_15k / 15k_30k / 30k_60k / 60k+.
KI-149 (2026-05-15). Falls back to range hints ("15-30k", "30 to 60k")
before delegating to `_parse_inr_amount` for a single number.
"""
if not text:
return None
s = str(text).lower()
# Explicit bucket hints first β€” order matters (more specific wins).
if re.search(r"60\s*k\s*\+|>\s*60|more\s+than\s+60|above\s+60|over\s+60", s):
return "60k+"
if re.search(r"30\s*[-to]+\s*60\s*k?|30k\s*[-_]\s*60k|30\s*to\s*60", s):
return "30k_60k"
if re.search(r"15\s*[-to]+\s*30\s*k?|15k\s*[-_]\s*30k|15\s*to\s*30", s):
return "15k_30k"
if re.search(r"under\s*15|less\s+than\s+15|below\s+15|<\s*15", s):
return "under_15k"
# Single amount β†’ bucket.
amt = _parse_inr_amount(s)
if amt is None:
return None
if amt < 15_000:
return "under_15k"
if amt < 30_000:
return "15k_30k"
if amt < 60_000:
return "30k_60k"
return "60k+"
def _parse_income_band(text: str) -> Optional[str]:
"""Map free-text income text β†’ one of under_5L / 5L-10L / 10L-25L / 25L+.
KI-149 (2026-05-15). Same approach as `_parse_budget_band`: explicit
bucket hints first, then a single rupee amount β†’ bucket.
"""
if not text:
return None
s = str(text).lower()
if re.search(r"25\s*l\s*\+|>\s*25|more\s+than\s+25|above\s+25|over\s+25", s):
return "25L+"
if re.search(r"10\s*[-to]+\s*25\s*l?|10l\s*[-_]\s*25l|10\s*to\s*25", s):
return "10L-25L"
if re.search(r"5\s*[-to]+\s*10\s*l?|5l\s*[-_]\s*10l|5\s*to\s*10", s):
return "5L-10L"
if re.search(r"under\s*5|less\s+than\s+5|below\s+5|<\s*5", s):
return "under_5L"
amt = _parse_inr_amount(s)
if amt is None:
return None
# Income is parsed in rupees; 5 lakh = 500_000.
if amt < 500_000:
return "under_5L"
if amt < 1_000_000:
return "5L-10L"
if amt < 2_500_000:
return "10L-25L"
return "25L+"
# ----------------------------------------------------------------------------
# Engine
# ----------------------------------------------------------------------------
# Canonical slot order for the fact-find hint API. The actual question
# phrasing lives in `single_brain.py`; this list only encodes which
# Profile attribute to fill next when nothing else is driving the
# conversation.
_SLOT_ORDER: list[str] = [
"name",
"age",
"dependents",
"location_tier",
"income_band",
"primary_goal",
"existing_cover_inr",
"budget_band",
"health_conditions",
]
def is_field_set(profile: Profile, field_name: str) -> bool:
v = getattr(profile, field_name, None)
if v is None:
return False
if isinstance(v, (list, str)) and len(v) == 0:
return False
return True
def next_question(profile: Profile) -> Optional[str]:
"""Return the field NAME of the next missing slot, or None if complete.
The caller in `backend/main.py:/api/profile/completeness` uses this
only to hint to the frontend which slot to ask next; the actual
question phrasing is produced by `single_brain.py`.
A free-form session (user driving free questions) returns None so the
hint endpoint reports "nothing to ask".
"""
if profile.free_form_session:
return None
for slot in _SLOT_ORDER:
if not is_field_set(profile, slot):
return slot
return None
# Question-id β†’ Profile field-name aliases. Some callers (notably
# `session_state.record_answer` driven by `awaiting_question_id`) pass a
# question ID rather than a Profile attribute name; this maps them.
_QID_TO_FIELD: dict[str, str] = {
"existing_cover": "existing_cover_inr",
"location": "location_tier",
"parents_age": "parents_age_max",
"budget": "budget_band",
}
def record_answer(profile: Profile, question_id: str, raw_answer: Any) -> Profile:
"""Mutate profile in place with a raw answer for a named slot.
Applies the parser map inline. `question_id` may be either a Profile
attribute name (preferred) or one of the question IDs in
`_QID_TO_FIELD`.
"""
field_name = _QID_TO_FIELD.get(question_id, question_id)
if not hasattr(profile, field_name):
return profile
value: Any = raw_answer
parser = _PARSERS.get(field_name)
if parser is not None:
try:
value = parser(raw_answer)
except Exception:
value = None
if value is not None and value != "":
setattr(profile, field_name, value)
# KI-095 β€” only mark slot asked once setattr succeeds, so a parse
# failure doesn't leave the slot in an asked-but-empty desync state.
if question_id not in profile.asked:
profile.asked.append(question_id)
return profile
def _parse_age(s: Any) -> Optional[int]:
digits = "".join(c for c in str(s) if c.isdigit())[:3]
if not digits:
return None
try:
return int(digits) or None
except ValueError:
return None
def _parse_existing_cover(s: Any) -> Optional[int]:
text = str(s).lower()
if any(k in text for k in ("no", "none", "nothing", "zero", "not")):
return 0
digits = "".join(c for c in text if c.isdigit())[:6]
if not digits:
return None
try:
amt = int(digits)
except ValueError:
return None
if any(k in text for k in ("l", "lakh", "lac")):
amt *= 100_000
return amt or None
# Parser dispatch by Profile field name. Slots not listed accept the raw value.
_PARSERS: dict[str, Any] = {
"age": _parse_age,
"income_band": _parse_income_band,
"existing_cover_inr": _parse_existing_cover,
"budget_band": _parse_budget_band,
}