Spaces:
Sleeping
Sleeping
| """ | |
| claim_auditor.py | |
| ---------------- | |
| Groq-powered comparison engine. | |
| Compares extracted bill text against: | |
| 1. The MediAudit generalised policy baseline (from policy_engine.py) | |
| 2. Any user-uploaded insurer-specific policy PDF (optional) | |
| Returns a structured audit result containing: | |
| - Per line-item breakdown (parameter, bill detail, policy clause, status, lag reason) | |
| - Risk score (0β100) per finding | |
| - Overall insurance eligibility verdict | |
| - Recommended next steps for the patient | |
| """ | |
| from __future__ import annotations | |
| import json | |
| import logging | |
| import re | |
| import textwrap | |
| from typing import Any | |
| from groq import Groq # type: ignore | |
| logger = logging.getLogger(__name__) | |
| # --------------------------------------------------------------------------- | |
| # Module-level client (initialised once via configure_groq) | |
| # --------------------------------------------------------------------------- | |
| _client: Groq | None = None | |
| # --------------------------------------------------------------------------- | |
| # Few-shot examples | |
| # --------------------------------------------------------------------------- | |
| FEW_SHOT_EXAMPLES: str = textwrap.dedent(""" | |
| <examples> | |
| <example id="1"> | |
| <scenario>Room rent sub-limit violation</scenario> | |
| <bill_snippet>Room Charges: ICU stay β 5 days Γ βΉ8,000/day = βΉ40,000</bill_snippet> | |
| <policy_snippet>ICU room rent capped at βΉ10,000/day</policy_snippet> | |
| <expected_row> | |
| { | |
| "parameter": "ICU Room Rent", | |
| "bill_detail": "βΉ8,000/day Γ 5 days = βΉ40,000", | |
| "policy_clause": "ICU sub-limit: βΉ10,000/day", | |
| "status": "β ", | |
| "lag_reason": "Daily ICU rate is within the βΉ10,000/day sub-limit. Fully admissible.", | |
| "risk_score": 5, | |
| "risk_label": "Low" | |
| } | |
| </expected_row> | |
| </example> | |
| <example id="2"> | |
| <scenario>Waiting period not satisfied β knee replacement</scenario> | |
| <bill_snippet>Knee Replacement Surgery β Date: 14-Mar-2024. Policy Inception: 01-Feb-2024</bill_snippet> | |
| <policy_snippet>Joint replacement: 24-month waiting period</policy_snippet> | |
| <expected_row> | |
| { | |
| "parameter": "Knee Replacement Surgery", | |
| "bill_detail": "Procedure on 14-Mar-2024 (policy age β 1.5 months)", | |
| "policy_clause": "Waiting Period β Joint replacement: 24 months", | |
| "status": "β", | |
| "lag_reason": "Waiting period not met. Remaining: β22.5 months. Claim not admissible.", | |
| "risk_score": 95, | |
| "risk_label": "Critical" | |
| } | |
| </expected_row> | |
| </example> | |
| <example id="3"> | |
| <scenario>Pre-existing diabetes complication</scenario> | |
| <bill_snippet>Diabetic Nephropathy treatment β βΉ80,000</bill_snippet> | |
| <policy_snippet>PED covered after 2-year waiting period</policy_snippet> | |
| <expected_row> | |
| { | |
| "parameter": "Diabetic Nephropathy", | |
| "bill_detail": "βΉ80,000 β complication of pre-existing diabetes", | |
| "policy_clause": "Pre-Existing Disease (PED) β 2-year waiting period", | |
| "status": "β οΈ", | |
| "lag_reason": "Diabetes is a known PED. Covered only if policy is >2 years old. 10% co-pay applies.", | |
| "risk_score": 60, | |
| "risk_label": "Medium" | |
| } | |
| </expected_row> | |
| </example> | |
| <example id="4"> | |
| <scenario>Excluded treatment β cosmetic</scenario> | |
| <bill_snippet>Rhinoplasty (nose reshaping) β βΉ1,20,000</bill_snippet> | |
| <policy_snippet>Cosmetic surgery explicitly excluded</policy_snippet> | |
| <expected_row> | |
| { | |
| "parameter": "Rhinoplasty", | |
| "bill_detail": "βΉ1,20,000 β elective cosmetic procedure", | |
| "policy_clause": "Exclusion β Cosmetic/aesthetic treatments", | |
| "status": "β", | |
| "lag_reason": "Elective cosmetic surgery is a hard exclusion. Claim fully rejected.", | |
| "risk_score": 100, | |
| "risk_label": "Critical" | |
| } | |
| </expected_row> | |
| </example> | |
| <example id="5"> | |
| <scenario>Co-pay β senior citizen non-network hospital</scenario> | |
| <bill_snippet>Patient age 63. Total bill βΉ2,00,000. Non-network hospital.</bill_snippet> | |
| <policy_snippet>Senior citizen co-pay 20%; Non-network co-pay 20% (max combined 30%)</policy_snippet> | |
| <expected_row> | |
| { | |
| "parameter": "Senior + Non-network Co-pay", | |
| "bill_detail": "βΉ2,00,000 β age 63, non-network hospital", | |
| "policy_clause": "Co-pay β Senior citizen 20% + Non-network 20% (capped at 30%)", | |
| "status": "β οΈ", | |
| "lag_reason": "Combined co-pay capped at 30%. Patient liability: βΉ60,000. Admissible: βΉ1,40,000.", | |
| "risk_score": 40, | |
| "risk_label": "Medium" | |
| } | |
| </expected_row> | |
| </example> | |
| </examples> | |
| """) | |
| # --------------------------------------------------------------------------- | |
| # System prompt | |
| # --------------------------------------------------------------------------- | |
| SYSTEM_PROMPT: str = textwrap.dedent(""" | |
| You are MediAudit, a senior insurance underwriting and claim validation expert | |
| with 20+ years of experience across Indian health insurance policies. | |
| You will receive: | |
| 1. <policy_baseline> β A generalised insurance policy with standard rules. | |
| 2. <uploaded_policy> β An insurer-specific policy (may be empty; use baseline if so). | |
| 3. <bill> β Extracted medical bill text. | |
| 4. <examples> β Few-shot labelled examples showing exact output format. | |
| Your tasks: | |
| A. For EACH line item in the bill, produce one JSON row with these exact keys: | |
| "parameter" β treatment / charge name | |
| "bill_detail" β amount + description from bill | |
| "policy_clause"β matching policy rule (baseline or uploaded) | |
| "status" β exactly one of: β β οΈ β | |
| "lag_reason" β 1β2 sentence explanation; quantify monetary gaps | |
| "risk_score" β integer 0β100 (0=no risk, 100=critical rejection) | |
| "risk_label" β one of: "Low" | "Medium" | "High" | "Critical" | |
| B. After the rows, produce an "eligibility" object: | |
| "verdict" β one of: "Eligible" | "Partially Eligible" | "Not Eligible" | |
| "summary" β 2β3 sentence plain-English explanation for the patient | |
| "next_steps" β list of 3β5 concrete action strings the patient should take | |
| C. Risk scoring guide: | |
| 0β20 β Low (no issue or minor informational flag) | |
| 21β50 β Medium (partial coverage, co-pay, sub-limit breach) | |
| 51β80 β High (waiting period issue, PED concern) | |
| 81β100 β Critical (hard exclusion, fraud risk, full rejection) | |
| D. Blend the uploaded policy with the baseline: | |
| - If uploaded policy has a stricter rule β use uploaded | |
| - If baseline has a stricter rule and uploaded is silent β use baseline | |
| - Use LLM reasoning for anything neither document covers explicitly | |
| Output ONLY a valid JSON object with two keys: "rows" and "eligibility". | |
| No markdown fences. No commentary outside the JSON. | |
| """).strip() | |
| # --------------------------------------------------------------------------- | |
| # User message builder | |
| # --------------------------------------------------------------------------- | |
| def _build_user_message( | |
| bill_text: str, | |
| policy_baseline_text: str, | |
| uploaded_policy_text: str, | |
| ) -> str: | |
| return textwrap.dedent(f""" | |
| {FEW_SHOT_EXAMPLES} | |
| <policy_baseline> | |
| {policy_baseline_text} | |
| </policy_baseline> | |
| <uploaded_policy> | |
| {uploaded_policy_text if uploaded_policy_text.strip() else "No insurer-specific policy uploaded. Use baseline only."} | |
| </uploaded_policy> | |
| <bill> | |
| {bill_text} | |
| </bill> | |
| Now perform the full audit and return the JSON object. | |
| """).strip() | |
| # --------------------------------------------------------------------------- | |
| # Public API | |
| # --------------------------------------------------------------------------- | |
| def configure_groq(api_key: str) -> None: | |
| """Initialise the Groq client. Call once at application startup.""" | |
| global _client | |
| _client = Groq(api_key=api_key) | |
| logger.info("Groq client initialised.") | |
| def audit_claim( | |
| bill_text: str, | |
| policy_baseline_text: str, | |
| uploaded_policy_text: str = "", | |
| model_name: str = "llama-3.3-70b-versatile", | |
| temperature: float = 0.1, | |
| ) -> dict[str, Any]: | |
| """ | |
| Audit a medical bill against the generalised policy baseline and any | |
| uploaded insurer-specific policy. | |
| Args: | |
| bill_text: Extracted text from the medical bill. | |
| policy_baseline_text: Compact text from policy_engine.get_policy_as_prompt_text(). | |
| uploaded_policy_text: Extracted text from a user-uploaded policy PDF (optional). | |
| model_name: Groq model to use. | |
| temperature: Sampling temperature. | |
| Returns: | |
| Dict with two keys: | |
| "rows" β list of per-line-item dicts | |
| "eligibility" β dict with verdict, summary, next_steps | |
| """ | |
| if _client is None: | |
| raise RuntimeError( | |
| "Groq client not initialised. Call configure_groq(api_key) first." | |
| ) | |
| user_message = _build_user_message( | |
| bill_text, policy_baseline_text, uploaded_policy_text | |
| ) | |
| logger.info("Sending audit request to Groq (%s)β¦", model_name) | |
| response = _client.chat.completions.create( | |
| model=model_name, | |
| temperature=temperature, | |
| response_format={"type": "json_object"}, | |
| messages=[ | |
| {"role": "system", "content": SYSTEM_PROMPT}, | |
| {"role": "user", "content": user_message}, | |
| ], | |
| ) | |
| raw: str = response.choices[0].message.content.strip() | |
| raw = re.sub(r"^```(?:json)?\s*", "", raw, flags=re.IGNORECASE) | |
| raw = re.sub(r"\s*```$", "", raw) | |
| parsed: dict[str, Any] = json.loads(raw) | |
| # Normalise: some models may return rows at top level | |
| if "rows" not in parsed: | |
| # Try to find a list and an eligibility object | |
| rows = next((v for v in parsed.values() if isinstance(v, list)), []) | |
| eligibility = parsed.get("eligibility", { | |
| "verdict": "Unknown", | |
| "summary": "Could not determine eligibility.", | |
| "next_steps": ["Please review manually."], | |
| }) | |
| parsed = {"rows": rows, "eligibility": eligibility} | |
| logger.info( | |
| "Audit complete β %d rows | verdict: %s", | |
| len(parsed.get("rows", [])), | |
| parsed.get("eligibility", {}).get("verdict", "?"), | |
| ) | |
| return parsed |