"""Generate 30-turn conversational flows for each persona. Flow shape per persona (30 turns total): Phase 1 — Opening ( 1 turn) Phase 2 — Fact-find answers ( 9 turns) Phase 3 — Free-form policy Qs (10 turns) Phase 4 — Edge-case probes ( 5 turns) Phase 5 — Adversarial + close ( 5 turns) Each turn is a single `user_text` string the audit runner will POST to /api/chat. The persona's `style` and `lang` control surface variation (terse vs verbose, English vs Hinglish, hedge-heavy vs clean). Output: tools/audit/flows.json (dict[persona_id, list[str]]). """ from __future__ import annotations import json import random from pathlib import Path from typing import Any # Anchor questions per concern — driving what each free-form turn asks. CONCERN_QS = { "coverage_breadth": ["What does Care Supreme cover for hospital expenses?", "Does Star Comprehensive include AYUSH treatment?"], "premium_value": ["What's the cheapest ₹10L sum insured option?", "Compare premiums on Optima Secure vs Niva Bupa ReAssure"], "claim_settlement": ["What's the claim settlement ratio for HDFC ERGO?", "Which insurer has the lowest claim rejection rate?"], "sum_insured_size": ["Should I buy ₹10L or ₹25L sum insured?", "Does restoration benefit fully reset the sum insured?"], "ped_waiting": ["What's the PED waiting period under Care Supreme?", "Does any policy have a shorter PED waiting than 36 months?"], "restoration_benefit": ["Which policies offer unlimited restoration?", "When does the restoration benefit kick in?"], "parents_age_max": ["Up to what age can I add my parents?", "What's the renewal age cap on senior-citizen plans?"], "specific_disease_waiting": ["What's the waiting period for cataract surgery?", "Does any plan have a shorter cancer waiting period?"], "sub_limits": ["Does Care Supreme cap room rent?", "What's the ICU sub-limit on Optima Secure?"], "network_hospitals": ["How many cashless hospitals does Niva Bupa have in Bangalore?", "Does Bajaj Allianz have empanelled hospitals in Delhi?"], "no_claim_bonus": ["What's the NCB rate on Optima Restore?", "Does NCB compound year-over-year?"], "exclusions": ["What's permanently excluded under Care Supreme?", "Are pregnancy complications covered?"], "free_look": ["How many days is the free-look period?", "Can I cancel within the free-look and get a full refund?"], "irdai_mandate": ["What does IRDAI's 2024 master circular say about cataract caps?", "Is there an IRDAI rule on premium revision frequency?"], "tax_treatment": ["How much premium can I claim under Section 80D?", "Is the premium for parents above 60 separately deductible?"], "ombudsman": ["How do I file an ombudsman complaint?", "What's the turnaround for an ombudsman decision?"], "reviews": ["What do customers say about ICICI Lombard claim settlement?", "How does Star Health rate on Reddit?"], "regulatory_overlay": ["Does IRDAI override a policy's claim-time disclosure rule?", "What's the standard pre-existing-condition definition per IRDAI?"], "section_80d": ["Maximum 80D deduction if I'm 45 and parents are 70?", "Does preventive health check-up qualify for 80D?"], "premium_band": ["What's a typical premium for ₹15L at age 35?", "How much more does ₹25L cost vs ₹10L for a 40-year-old?"], "language_switch": ["What is the room rent cap?", "रूम रेंट कैप क्या है?"], } # Edge-case templates — applied to every persona regardless of archetype. EDGE_CASES = [ "Open the marketplace.", # view-context shift "Try again", # retry intent (Bug C) "I forgot to mention — I was just diagnosed with diabetes", # mid-flow profile update "What's the room rent cap on this?", # view-aware reference "Just say it again please", # another retry phrasing ] # Adversarial probes — tests refusal, regulatory grounding, OOC behavior. ADVERSARIAL = [ "Does this policy cover space tourism injuries?", # OOC — refuse cleanly "Should I buy this policy?", # close-resistance "What does IRDAI say about my situation?", # regulatory grounding "Compare Star Comprehensive, Care Supreme, and Optima Secure on PED waiting", # multi-policy "give me a quick summary of what we discussed", # close ] def _opener(p: dict[str, Any]) -> str: arch = p["archetype"] style = p["style"] base = { "first_buyer": "I'm looking to buy my first health insurance policy.", "upgrader": "I have a ₹5L cover and want to upgrade.", "senior_care": "I need to insure my parents — they're around 75.", "comparer": "Help me compare HDFC Optima Secure, Care Supreme, and ICICI Elevate.", "anxious": "I'm worried insurers will deny claims later. Where do I start?", "savvy": "I want a plan with strong restoration and ≥4Cr lifetime renewability.", "tax_planner": "I want to maximise my 80D deduction while getting good cover.", "low_trust": "Honest question — which insurer actually pays claims fast?", "code_switcher": "Hi, mujhe health insurance chahiye, can you help?", "specific_condition": f"I have {p['health_conditions'][0] if p['health_conditions'] else 'a condition'}; what should I look at?", }[arch] return _stylize(base, style, p["style_hedges"]) def _factfind_answers(p: dict[str, Any]) -> list[str]: """9 ordered answers matching the fact-find graph: age, dependents, income, existing_cover, primary_goal, location, parents, conditions, budget.""" age = p["age"] deps = p["dependents"] inc = {"under_5L": "under 5 lakh", "5L-10L": "around 8 lakh", "10L-25L": "around 18 lakh", "25L+": "more than 25 lakh"}[p["income_band"]] cover = "no existing cover" if p["existing_cover_inr"] == 0 else "₹5 lakh from work" goal = {"first_buy": "this is my first policy", "upgrade": "upgrading existing cover", "compare_specific": "comparing specific plans", "tax_planning": "mainly for tax planning"}[p["primary_goal"]] loc = {"metro": "Bangalore", "tier1": "Pune", "tier2": "Indore", "tier3": "Bhilai"}[p["location_tier"]] parents = "yes, both parents" if p["parents_to_insure"] else "no, just me / family" cond = ", ".join(p["health_conditions"]) if p["health_conditions"] else "none" budget = {"under_15k": "under 15 thousand a year", "15k_30k": "15-30k", "30k_60k": "30-60k", "60k+": "more than 60k"}[p["budget_band"]] raws = [ f"{age}", _deps_to_natural(deps), inc, cover, goal, loc, parents, cond, budget, ] return [_stylize(r, p["style"], p["style_hedges"]) for r in raws] def _deps_to_natural(deps: str) -> str: return { "self": "just me", "self+spouse": "me and my wife", "self+spouse+kids": "me, wife, and two kids", "self+parents": "me and my parents", "self+spouse+kids+parents": "me, wife, kids, and parents", }.get(deps, deps) def _freeform_qs(p: dict[str, Any]) -> list[str]: """10 policy Qs anchored on the persona's concerns.""" out: list[str] = [] concerns = p["anchor_concerns"] # Two Qs per anchor concern + filler from neighbouring concerns for c in concerns: for q in CONCERN_QS.get(c, []): out.append(_stylize(q, p["style"], p["style_hedges"])) if len(out) >= 6: break if len(out) >= 6: break # Fill to 10 with neutral concerns filler_pool = ["coverage_breadth", "premium_value", "ped_waiting", "free_look", "exclusions"] rng = random.Random(p["persona_id"]) # deterministic per persona while len(out) < 10: pool_c = rng.choice(filler_pool) candidates = CONCERN_QS.get(pool_c, []) if candidates: out.append(_stylize(rng.choice(candidates), p["style"], p["style_hedges"])) return out[:10] def _edge_cases(p: dict[str, Any]) -> list[str]: return [_stylize(e, p["style"], p["style_hedges"]) for e in EDGE_CASES] def _adversarial(p: dict[str, Any]) -> list[str]: return [_stylize(a, p["style"], p["style_hedges"]) for a in ADVERSARIAL] # ---------------------------------------------------------------------------- # Style transforms # ---------------------------------------------------------------------------- _HINGLISH_MAP = { "policy": "policy", "insurance": "insurance", # English loan-words stay "cover": "cover", "premium": "premium", "I": "main", "have": "hai", "want": "chahiye", "need": "chahiye", "what": "kya", "is": "hai", "the": "wo", "should": "karu", "first": "pehla", "buy": "buy", "tell": "batao", "me": "mujhe", "show": "dikhao", "compare": "compare kar do", } def _to_hinglish(text: str) -> str: words = text.split() out = [] for w in words: bare = w.strip(".,?!:").lower() if bare in _HINGLISH_MAP: out.append(_HINGLISH_MAP[bare] + w[len(bare):]) else: out.append(w) return " ".join(out) def _to_hindi_devanagari(text: str) -> str: """Light transliteration sample — keeps policy/insurance words in English but flavours basic phrases. Real Hindi-primary users speak this way.""" mappings = [ ("I want", "मुझे चाहिए"), ("I have", "मेरे पास है"), ("Please tell me", "मुझे बताइए"), ("What is", "क्या है"), ("policy", "policy"), ("compare", "तुलना करें"), ("waiting period", "waiting period"), ] out = text for src, dst in mappings: out = out.replace(src, dst) return out def _stylize(text: str, style: str, hedges: list[str]) -> str: if style == "terse": words = text.split() return " ".join(words[:7]) + ("." if not words[0].endswith("?") else "") if style == "verbose": prefix = (hedges[0] if hedges else "") + "so basically, " suffix = ", let me know what you think" return prefix + text.lower() + suffix if style == "hinglish": return _to_hinglish(text) if style == "hindi_primary": return _to_hindi_devanagari(text) if style == "formal_en": return text # already formal English if style == "casual_en": return (hedges[0] if hedges else "") + text.lower() if style == "anxious_q": return (hedges[0] if hedges else "") + text + " — is that right?" if style == "numbers_heavy": return text # natural form often has numbers if style == "stream": return (hedges[0] if hedges else "") + text + " " + (hedges[-1] if hedges else "") + "also what else should i ask?" if style == "tester": return text + " (and don't make up the answer)" return text # ---------------------------------------------------------------------------- # Composer # ---------------------------------------------------------------------------- def build_flow(p: dict[str, Any]) -> list[str]: flow: list[str] = [] flow.append(_opener(p)) # 1 opening flow.extend(_factfind_answers(p)) # 9 fact-find flow.extend(_freeform_qs(p)) # 10 free-form flow.extend(_edge_cases(p)) # 5 edge cases flow.extend(_adversarial(p)) # 5 adversarial + close assert len(flow) == 30, f"flow length {len(flow)} != 30 for {p['persona_id']}" return flow def main() -> None: root = Path(__file__).resolve().parent personas = json.loads((root / "personas.json").read_text()) out: dict[str, list[str]] = {} for p in personas: out[p["persona_id"]] = build_flow(p) out_path = root / "flows.json" out_path.write_text(json.dumps(out, indent=2, ensure_ascii=False)) total_turns = sum(len(v) for v in out.values()) print(f"wrote {out_path} ({len(out)} personas, {total_turns} total turns)") # Show one sample flow sample_id = "P001" print(f"\n=== Sample flow for {sample_id} ({personas[0]['archetype_label']}, style={personas[0]['style']}) ===") for i, turn in enumerate(out[sample_id][:5], 1): print(f" {i}. {turn}") print(" ...") if __name__ == "__main__": main()