"""User profile state + slot-order hint for the fact-find.

Fact-find phrasing lives in `backend/single_brain.py` (the single-LLM-call
brain); this module provides:

  - `Profile`              — accumulated user state (imported widely)
  - `record_answer`        — slot-write helper used by session_state
  - INR / budget / income parsers — used by the brain tool layer
  - `is_field_set`         — local helper for record_answer + next_question
  - `next_question`        — returns the field NAME (str) of the next
                             missing slot in canonical order. Used by
                             `/api/profile/completeness`'s `next_question_hint`.

Public API:
  - Profile dataclass
  - next_question(profile) -> str | None   (field name, None = complete)
  - record_answer(profile, field_name, raw_value) -> Profile
"""

from __future__ import annotations

import re
from dataclasses import dataclass, field
from typing import Any, Optional


@dataclass
class Profile:
    """User profile accumulated during fact-find."""
    name: Optional[str] = None  # KI-040 — humanise + key for cross-session lookup
    age: Optional[int] = None
    dependents: Optional[str] = None  # "self", "self+spouse", "self+spouse+kids", "self+parents", etc.
    income_band: Optional[str] = None  # "under_5L", "5L-10L", "10L-25L", "25L+"
    existing_cover_inr: Optional[int] = None  # 0 means none
    primary_goal: Optional[str] = None  # "first_buy", "upgrade", "compare_specific", "tax_planning"
    location_tier: Optional[str] = None  # "metro", "tier1", "tier2", "tier3"
    parents_to_insure: Optional[bool] = None
    parents_age_max: Optional[int] = None  # if parents_to_insure
    parents_has_ped: Optional[bool] = None  # if parents_to_insure
    budget_band: Optional[str] = None  # "under_15k", "15k_30k", "30k_60k", "60k+"
    budget_inr: Optional[int] = None  # #64 — EXACT ₹/yr the user stated/slid;
    # preserved losslessly so the UI shows what they said (₹15,000), not a
    # 4-band representative (₹12k). budget_band is still derived for pricing.
    desired_sum_insured_inr: Optional[int] = None  # SOFT pricing input (post-recap)
    health_conditions: Optional[list[str]] = field(default_factory=list)  # ["diabetes", "hypertension", ...]
    # D2 (2026-05-15) — co-pay tolerance + family medical history. Coupled
    # SLOT_UNION additions captured via RULE 2.5 post-recap, both flow into
    # premium_calculator (copay discount + family-history loading) and
    # retrieval (family-history rider boost keywords).
    copay_pct: Optional[int] = None  # 0-50, % of every claim user accepts
    family_medical_history: list[str] = field(default_factory=list)  # blood-family conditions
    smoker: Optional[bool] = None  # KI-275 — tobacco use, +30-50% premium loading
    asked: list[str] = field(default_factory=list)  # question IDs / field names already asked
    free_form_session: bool = False  # True = user asks free questions, not driven by us
    # KI-063 (2026-05-15) — per-user policy interaction log so the bot
    # remembers which policies were shown / selected / rejected across
    # sessions. Each entry is a dict with shape:
    #   {policy_slug, insurer, event_at (ISO Z), session_id, reason}
    # Dedup at write-time on (policy_slug, event_type) — re-events just
    # bump event_at + session_id rather than appending duplicates.
    shown_policies: list[dict] = field(default_factory=list)     # KI-063
    selected_policies: list[dict] = field(default_factory=list)  # KI-063
    rejected_policies: list[dict] = field(default_factory=list)  # KI-063


# ----------------------------------------------------------------------------
# Free-text INR amount parser for budget + income. Bare digits ("30000"),
# "30 thousand", "30 grand", "₹30,000", "1 lakh", "1.5L" all map cleanly
# to a rupee amount.
# ----------------------------------------------------------------------------

def _parse_inr_amount(text: str) -> Optional[int]:
    """Extract an INR amount in rupees from free text.

    Handles:
      - "30000", "30,000", "₹30,000", "Rs 30000", "rs. 30000"
      - "30k", "30 k", "30K"
      - "30 thousand", "30 grand"
      - "1 lakh", "1.5 lakh", "1L", "1.5L", "1 lac"
      - "1 crore", "1cr"
      - strips fluff: "maximum 30000", "I can pay 30000", "around 25000"
      - tolerates per-year qualifiers: "/year", "per year", "p.a."

    Rejects bare digits below ₹1000 (no plausible annual health insurance
    budget/income falls there) and rejects any text whose only number is
    in an age context ("29 years old", "age 29", "I am 29"), so an age
    answer is never misread as a rupee amount.

    Returns the integer rupee amount, or None if no number is recognisable
    or if the only numbers in the text are clearly not currency.
    """
    if not text:
        return None
    s = str(text).lower().strip()
    # Strip currency symbols + thousands separators so "₹30,000" parses.
    s = s.replace("₹", " ").replace("rs.", " ").replace("rs", " ")
    s = s.replace(",", "")
    # Crore (highest unit first so longer alternation wins).
    m = re.search(r"(\d+(?:\.\d+)?)\s*(?:cr|crore|crores)\b", s)
    if m:
        try:
            return int(float(m.group(1)) * 10_000_000)
        except ValueError:
            return None
    # Lakh / lac.
    m = re.search(r"(\d+(?:\.\d+)?)\s*(?:l(?:akh|ac)?s?)\b", s)
    if m:
        try:
            return int(float(m.group(1)) * 100_000)
        except ValueError:
            return None
    # Thousand / grand / k.
    m = re.search(r"(\d+(?:\.\d+)?)\s*(?:thousand|grand|k)\b", s)
    if m:
        try:
            return int(float(m.group(1)) * 1_000)
        except ValueError:
            return None
    # KI-161 — bare-digit fallback now guarded against age contexts.
    # If the text is clearly about age, refuse to interpret any number
    # as a currency amount.
    if re.search(
        r"\b(?:year|years|yr|yrs|y\s*o)\s*(?:old)?\b|\bage\b|\bi\s*am\s+\d{1,3}\b",
        s,
    ):
        return None
    # Bare digit run — pick the largest number-like token (handles
    # "maximum 30000", "around 25000", "I can pay 30000"). Magnitude
    # floor of ₹1000 — anything smaller is implausible for an annual
    # health-insurance budget or income.
    nums = re.findall(r"\d+(?:\.\d+)?", s)
    if nums:
        try:
            amt = int(float(max(nums, key=lambda x: float(x))))
        except ValueError:
            return None
        if amt < 1_000:
            return None
        return amt
    return None


def _parse_budget_band(text: str) -> Optional[str]:
    """Map free-text budget text → one of under_15k / 15k_30k / 30k_60k / 60k+.

    KI-149 (2026-05-15). Falls back to range hints ("15-30k", "30 to 60k")
    before delegating to `_parse_inr_amount` for a single number.
    """
    if not text:
        return None
    s = str(text).lower()
    # Explicit bucket hints first — order matters (more specific wins).
    if re.search(r"60\s*k\s*\+|>\s*60|more\s+than\s+60|above\s+60|over\s+60", s):
        return "60k+"
    if re.search(r"30\s*[-to]+\s*60\s*k?|30k\s*[-_]\s*60k|30\s*to\s*60", s):
        return "30k_60k"
    if re.search(r"15\s*[-to]+\s*30\s*k?|15k\s*[-_]\s*30k|15\s*to\s*30", s):
        return "15k_30k"
    if re.search(r"under\s*15|less\s+than\s+15|below\s+15|<\s*15", s):
        return "under_15k"
    # Single amount → bucket.
    amt = _parse_inr_amount(s)
    if amt is None:
        return None
    if amt < 15_000:
        return "under_15k"
    if amt < 30_000:
        return "15k_30k"
    if amt < 60_000:
        return "30k_60k"
    return "60k+"


def _parse_income_band(text: str) -> Optional[str]:
    """Map free-text income text → one of under_5L / 5L-10L / 10L-25L / 25L+.

    KI-149 (2026-05-15). Same approach as `_parse_budget_band`: explicit
    bucket hints first, then a single rupee amount → bucket.
    """
    if not text:
        return None
    s = str(text).lower()
    if re.search(r"25\s*l\s*\+|>\s*25|more\s+than\s+25|above\s+25|over\s+25", s):
        return "25L+"
    if re.search(r"10\s*[-to]+\s*25\s*l?|10l\s*[-_]\s*25l|10\s*to\s*25", s):
        return "10L-25L"
    if re.search(r"5\s*[-to]+\s*10\s*l?|5l\s*[-_]\s*10l|5\s*to\s*10", s):
        return "5L-10L"
    if re.search(r"under\s*5|less\s+than\s+5|below\s+5|<\s*5", s):
        return "under_5L"
    amt = _parse_inr_amount(s)
    if amt is None:
        return None
    # Income is parsed in rupees; 5 lakh = 500_000.
    if amt < 500_000:
        return "under_5L"
    if amt < 1_000_000:
        return "5L-10L"
    if amt < 2_500_000:
        return "10L-25L"
    return "25L+"


# ----------------------------------------------------------------------------
# Engine
# ----------------------------------------------------------------------------

# Canonical slot order for the fact-find hint API. The actual question
# phrasing lives in `single_brain.py`; this list only encodes which
# Profile attribute to fill next when nothing else is driving the
# conversation.
_SLOT_ORDER: list[str] = [
    "name",
    "age",
    "dependents",
    "location_tier",
    "income_band",
    "primary_goal",
    "existing_cover_inr",
    "budget_band",
    "health_conditions",
]


def is_field_set(profile: Profile, field_name: str) -> bool:
    v = getattr(profile, field_name, None)
    if v is None:
        return False
    if isinstance(v, (list, str)) and len(v) == 0:
        return False
    return True


def next_question(profile: Profile) -> Optional[str]:
    """Return the field NAME of the next missing slot, or None if complete.

    The caller in `backend/main.py:/api/profile/completeness` uses this
    only to hint to the frontend which slot to ask next; the actual
    question phrasing is produced by `single_brain.py`.

    A free-form session (user driving free questions) returns None so the
    hint endpoint reports "nothing to ask".
    """
    if profile.free_form_session:
        return None

    for slot in _SLOT_ORDER:
        if not is_field_set(profile, slot):
            return slot
    return None


# Question-id → Profile field-name aliases. Some callers (notably
# `session_state.record_answer` driven by `awaiting_question_id`) pass a
# question ID rather than a Profile attribute name; this maps them.
_QID_TO_FIELD: dict[str, str] = {
    "existing_cover": "existing_cover_inr",
    "location": "location_tier",
    "parents_age": "parents_age_max",
    "budget": "budget_band",
}


def record_answer(profile: Profile, question_id: str, raw_answer: Any) -> Profile:
    """Mutate profile in place with a raw answer for a named slot.

    Applies the parser map inline. `question_id` may be either a Profile
    attribute name (preferred) or one of the question IDs in
    `_QID_TO_FIELD`.
    """
    field_name = _QID_TO_FIELD.get(question_id, question_id)
    if not hasattr(profile, field_name):
        return profile
    value: Any = raw_answer
    parser = _PARSERS.get(field_name)
    if parser is not None:
        try:
            value = parser(raw_answer)
        except Exception:
            value = None
    if value is not None and value != "":
        setattr(profile, field_name, value)
        # KI-095 — only mark slot asked once setattr succeeds, so a parse
        # failure doesn't leave the slot in an asked-but-empty desync state.
        if question_id not in profile.asked:
            profile.asked.append(question_id)
    return profile


def _parse_age(s: Any) -> Optional[int]:
    digits = "".join(c for c in str(s) if c.isdigit())[:3]
    if not digits:
        return None
    try:
        return int(digits) or None
    except ValueError:
        return None


def _parse_existing_cover(s: Any) -> Optional[int]:
    text = str(s).lower()
    if any(k in text for k in ("no", "none", "nothing", "zero", "not")):
        return 0
    digits = "".join(c for c in text if c.isdigit())[:6]
    if not digits:
        return None
    try:
        amt = int(digits)
    except ValueError:
        return None
    if any(k in text for k in ("l", "lakh", "lac")):
        amt *= 100_000
    return amt or None


# Parser dispatch by Profile field name. Slots not listed accept the raw value.
_PARSERS: dict[str, Any] = {
    "age": _parse_age,
    "income_band": _parse_income_band,
    "existing_cover_inr": _parse_existing_cover,
    "budget_band": _parse_budget_band,
}