Spaces:

rohitsar567
/

InsuranceBot

Sleeping

App Files Files Community

InsuranceBot / eval /generate_gold.py

rohitsar567

Deploy v1 — single-Docker FastAPI + Next.js + RAG + voice + faithfulness

5dbdf6e verified about 2 months ago

Raw

History Blame Contribute Delete

8.25 kB

	"""Generate gold Q&A pairs (Pipeline A — auto from structured extraction).

	For every successfully extracted policy in DuckDB, generate templated questions
	whose answers come directly from the structured fields. Output to eval/gold_qa.json.

	Run AFTER extraction:
	python -m eval.generate_gold
	"""

	from __future__ import annotations

	import json
	import re
	import time
	from pathlib import Path
	from typing import Any

	import duckdb

	from backend.config import settings

	ROOT = settings.CORPUS_DIR.parent.parent
	OUTPUT = ROOT / "eval" / "gold_qa.json"


	# Question templates per field. Each tuple: (question_format, answer_format, type, difficulty).
	# {pn} is replaced with policy_name, {v} with the field value.
	QUESTION_TEMPLATES: dict[str, list[tuple[str, str, str, str]]] = {
	"pre_existing_disease_waiting_months": [
	("What is the waiting period for pre-existing diseases under {pn}?", "{v} months from policy inception", "waiting_period", "easy"),
	("If I have diabetes, how long do I have to wait before I can claim under {pn}?", "{v} months — pre-existing diseases have a waiting period of {v} months from policy start", "waiting_period", "medium"),
	],
	"initial_waiting_period_days": [
	("What is the initial waiting period under {pn}?", "{v} days from policy inception", "waiting_period", "easy"),
	],
	"maternity_waiting_months": [
	("What's the maternity benefit waiting period under {pn}?", "{v} months", "waiting_period", "easy"),
	],
	"pre_hospitalization_days": [
	("How many days of pre-hospitalization expenses does {pn} cover?", "{v} days", "coverage_scope", "easy"),
	],
	"post_hospitalization_days": [
	("How many days of post-hospitalization expenses does {pn} cover?", "{v} days", "coverage_scope", "easy"),
	],
	"day_care_treatments_count": [
	("How many day-care treatments are covered under {pn}?", "{v} day-care treatments", "coverage_scope", "easy"),
	],
	"ayush_coverage": [
	("Does {pn} cover AYUSH (Ayurveda, Yoga, Unani, Siddha, Homeopathy)?", "{v_yes_no}", "coverage_scope", "easy"),
	],
	"no_claim_bonus_pct": [
	("What's the no-claim bonus on {pn}?", "{v}% step-up on sum insured per claim-free year", "bonus", "easy"),
	],
	"room_rent_capping": [
	("Is there a cap on room rent under {pn}?", "{v}", "sub_limit", "medium"),
	],
	"copayment_pct": [
	("Is there a copayment under {pn}?", "{v_copay}", "sub_limit", "easy"),
	],
	"network_hospital_count": [
	("How many hospitals are in the {pn} cashless network?", "Approximately {v:,} hospitals", "network", "easy"),
	],
	"max_renewal_age": [
	("Up to what age can {pn} be renewed?", "{v_renewal}", "eligibility", "easy"),
	],
	"min_entry_age": [
	("What is the minimum entry age for {pn}?", "{v} years", "eligibility", "easy"),
	],
	"max_entry_age": [
	("What is the maximum entry age for {pn}?", "{v} years", "eligibility", "easy"),
	],
	}


	# Adversarial questions appended for every policy — bot should refuse.
	REFUSAL_TEMPLATES = [
	("Does {pn} cover injuries from space tourism?", "expected_refusal", "exclusions_oos", "hard"),
	("What is the maximum claim amount for diamond-tipped surgical procedures under {pn}?", "expected_refusal", "exclusions_oos", "hard"),
	("What is the IRDAI mandate on dental coverage that {pn} must follow?", "expected_refusal", "regulatory_oos", "hard"),
	]


	def yes_no(v: Any) -> str:
	if v is True or v == "true" or v == 1:
	return "Yes"
	if v is False or v == "false" or v == 0:
	return "No"
	if isinstance(v, dict) and "covered" in v:
	return "Yes" if v["covered"] else "No"
	return str(v)


	def copay_str(v: Any) -> str:
	try:
	v = float(v)
	except Exception:
	return str(v)
	if v <= 0:
	return "No copayment"
	return f"{v:.0f}% copayment applies"


	def renewal_str(v: Any) -> str:
	try:
	n = int(v)
	if n >= 100:
	return "Lifelong renewability"
	return f"Up to age {n}"
	except Exception:
	return str(v)


	def format_answer(template: str, v: Any) -> str:
	out = template
	out = out.replace("{v_yes_no}", yes_no(v))
	out = out.replace("{v_copay}", copay_str(v))
	out = out.replace("{v_renewal}", renewal_str(v))
	if "{v:,}" in out:
	try:
	out = out.replace("{v:,}", f"{int(v):,}")
	except Exception:
	out = out.replace("{v:,}", str(v))
	out = out.replace("{v}", str(v))
	return out


	def load_policies() -> list[dict]:
	if not settings.STRUCTURED_DB.exists():
	return []
	con = duckdb.connect(str(settings.STRUCTURED_DB), read_only=True)
	rows = con.execute("SELECT policy_id, policy_name, data_json FROM policies").fetchall()
	con.close()
	out = []
	for pid, pname, data in rows:
	try:
	d = json.loads(data)
	d["_policy_id"] = pid
	d["_policy_name"] = pname or d.get("policy_name", pid)
	out.append(d)
	except Exception:
	continue
	return out


	def is_present(v: Any) -> bool:
	if v is None:
	return False
	if isinstance(v, str) and v.strip() == "":
	return False
	if isinstance(v, list) and len(v) == 0:
	return False
	if isinstance(v, dict):
	# Pydantic CoverageItem dict — present if covered is set
	return v.get("covered") is not None
	return True


	def extract_value_for_template(v: Any) -> Any:
	"""Some fields are CoverageItem dicts; pull a representative value."""
	if isinstance(v, dict) and "covered" in v:
	return v["covered"] # used by yes_no formatter
	return v


	def generate() -> list[dict]:
	policies = load_policies()
	gold: list[dict] = []

	for p in policies:
	pid = p["_policy_id"]
	pname = p["_policy_name"]

	for field, templates in QUESTION_TEMPLATES.items():
	raw_v = p.get(field)
	if not is_present(raw_v):
	continue
	v = extract_value_for_template(raw_v)

	for question_fmt, answer_fmt, qtype, difficulty in templates:
	answer = format_answer(answer_fmt, v)
	if not answer.strip():
	continue
	gold.append({
	"id": f"{pid}::{field}::{difficulty}",
	"policy_id": pid,
	"question": question_fmt.format(pn=pname),
	"expected_answer": answer,
	"question_type": qtype,
	"difficulty": difficulty,
	"expected_refusal": False,
	"language": "en",
	"generated_by": "pipeline_a",
	"source_field": field,
	})

	for question_fmt, marker, qtype, difficulty in REFUSAL_TEMPLATES:
	gold.append({
	"id": f"{pid}::REFUSE::{qtype}::{difficulty}",
	"policy_id": pid,
	"question": question_fmt.format(pn=pname),
	"expected_answer": "Bot should refuse or say not in document.",
	"question_type": qtype,
	"difficulty": difficulty,
	"expected_refusal": True,
	"language": "en",
	"generated_by": "pipeline_c_refusal",
	})

	return gold


	def main():
	gold = generate()
	OUTPUT.parent.mkdir(parents=True, exist_ok=True)
	OUTPUT.write_text(json.dumps(gold, indent=2))
	print(f"Wrote {len(gold)} gold Q&A pairs to {OUTPUT.relative_to(ROOT)}")
	# Quick breakdown
	by_policy: dict[str, int] = {}
	by_type: dict[str, int] = {}
	refusal_count = 0
	for g in gold:
	by_policy[g["policy_id"]] = by_policy.get(g["policy_id"], 0) + 1
	by_type[g["question_type"]] = by_type.get(g["question_type"], 0) + 1
	if g["expected_refusal"]:
	refusal_count += 1
	print(f"Policies covered: {len(by_policy)}")
	print(f"Refusal questions: {refusal_count}")
	print("By type:")
	for t, n in sorted(by_type.items(), key=lambda kv: -kv[1])[:15]:
	print(f" {t:>25s}: {n}")


	if __name__ == "__main__":
	main()