Spaces:
Sleeping
Sleeping
| """ | |
| Gradio frontend for G.U.I.D.E. | |
| Tabs: | |
| 1. Chat β Conversational interface; π badge + side-by-side redaction | |
| reveal showing exactly what was stripped vs. what the AI saw. | |
| 2. Verify Entities β HITL panel: editable entity fields + "Confirm & Generate Draft". | |
| 3. Complaint Draft β Rendered final complaint with copy / .txt / PDF download. | |
| 4. Escalation Guide β Recommended authorities with portal links. | |
| 5. Privacy Audit β Timestamped trail of every outbound/local event, with a | |
| verified "0 raw identifiers transmitted" guarantee. | |
| 6. About β Architecture diagram, model cards, tech stack. | |
| All API calls go to the FastAPI backend (src/api/main.py). | |
| Session ID is created on app load via POST /api/session/create and persisted in | |
| Gradio State for the lifetime of the browser session. | |
| """ | |
| from __future__ import annotations | |
| import html | |
| import json | |
| import os | |
| import re | |
| import tempfile | |
| import time | |
| from pathlib import Path | |
| import gradio as gr | |
| import requests | |
| # ββ Backend address ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| API_BASE = "http://localhost:8000" | |
| # CMA + Anthropic API can be slow. On rate-limited keys a single turn may burn | |
| # several minutes of 429 backoff (GUIDE_RETRY_MAX Γ exponential delay) across | |
| # tool rounds. The committed default (120s) suits a healthy key; local devs on a | |
| # rate-limited key can extend it via GUIDE_CHAT_TIMEOUT in .env (never committed). | |
| _TIMEOUT_CHAT = int(os.getenv("GUIDE_CHAT_TIMEOUT", "120")) | |
| _TIMEOUT_UPLOAD = 90 # OCR + ViT can take 10-30 s per page | |
| _TIMEOUT_SHORT = 15 # health / session create | |
| # Draft and escalation guide are two separate requests (split to stay under the | |
| # per-minute token cap). Optionally pause between them so the token bucket | |
| # refills before the escalation request fires. Default 0 (no wait); set | |
| # GUIDE_SPLIT_DELAY=30 in .env if the escalation request still 429s. | |
| _SPLIT_DELAY = float(os.getenv("GUIDE_SPLIT_DELAY", "0")) | |
| # ββ Privacy badge ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| _BADGE_HTML = ( | |
| '<div style="display:inline-flex;align-items:center;gap:9px;' | |
| "background:var(--g-good-bg);border:1px solid var(--g-good-bd);" | |
| "border-radius:999px;padding:7px 16px;font-size:0.83em;" | |
| "color:var(--g-good-fg);margin:6px 0 2px;" | |
| 'box-shadow:0 6px 16px -10px rgba(16,185,129,.5);">' | |
| "π <b>Privacy protected</b>" | |
| '<span style="opacity:.85;">β identifiers redacted locally ' | |
| "before reaching the AI</span>" | |
| "</div>" | |
| ) | |
| _BADGE_HIDDEN = "" | |
| # ββ Custom CSS βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| _CSS = """ | |
| @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700;800&family=JetBrains+Mono:wght@400;500&display=swap'); | |
| :root { | |
| --g-accent:#4f46e5; --g-accent-2:#7c3aed; | |
| --g-grad:linear-gradient(135deg,#4f46e5 0%,#7c3aed 100%); | |
| --g-ink:#0f172a; --g-muted:#64748b; --g-line:#e7e9f2; | |
| --g-surface:#ffffff; /* hero / cards */ | |
| --g-surface-2:#ffffff; /* inner code/text boxes */ | |
| --g-panel:#fafafa; /* subtle panel background */ | |
| --g-chip-bg:rgba(79,70,229,.08); --g-chip-fg:#4338ca; --g-chip-bd:rgba(79,70,229,.18); | |
| --g-good-bg:#e8f5e9; --g-good-fg:#2e7d32; --g-good-bd:#c8e6c9; | |
| --g-bad-bg:#ffebee; --g-bad-fg:#c62828; | |
| --g-warn-bg:#fff3e0; --g-warn-bd:#ffb74d; | |
| --g-tabbar:rgba(255,255,255,.65); --g-hover:rgba(15,23,42,.04); | |
| } | |
| /* Dark palette β Gradio adds `.dark` to <body>; custom props cascade from there */ | |
| .dark { | |
| --g-ink:#e7ebf5; --g-muted:#94a0b8; --g-line:#27314c; | |
| --g-surface:#141c30; --g-surface-2:#0e1626; --g-panel:#121a2c; | |
| --g-chip-bg:rgba(124,58,237,.20); --g-chip-fg:#c4b5fd; --g-chip-bd:rgba(124,58,237,.38); | |
| --g-good-bg:rgba(16,185,129,.16); --g-good-fg:#6ee7b7; --g-good-bd:rgba(16,185,129,.35); | |
| --g-bad-bg:rgba(239,68,68,.18); --g-bad-fg:#fca5a5; | |
| --g-warn-bg:rgba(245,158,11,.15); --g-warn-bd:rgba(245,158,11,.5); | |
| --g-tabbar:rgba(255,255,255,.05); --g-hover:rgba(255,255,255,.07); | |
| } | |
| /* ββ App shell β centered, breathable, sans-serif βββββββββββββββββββββββββ */ | |
| .gradio-container { max-width:1120px !important; margin:0 auto !important; | |
| font-family:'Inter',ui-sans-serif,system-ui,sans-serif !important; } | |
| footer { display:none !important; } | |
| /* ββ Hero header ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */ | |
| .guide-hero { | |
| position:relative; text-align:center; padding:32px 20px 24px; margin:6px 0 4px; | |
| border-radius:24px; overflow:hidden; border:1px solid var(--g-line); | |
| background: | |
| radial-gradient(120% 150% at 50% -30%, rgba(124,58,237,.17), transparent 60%), | |
| radial-gradient(90% 120% at 50% 0%, rgba(79,70,229,.13), transparent 55%), | |
| var(--g-surface); | |
| box-shadow:0 18px 40px -26px rgba(30,27,75,.55); | |
| } | |
| .guide-hero .g-badge { | |
| display:inline-flex; align-items:center; justify-content:center; | |
| width:56px; height:56px; border-radius:17px; margin-bottom:13px; | |
| background:var(--g-grad); color:#fff; font-size:27px; | |
| box-shadow:0 10px 22px -7px rgba(79,70,229,.65); | |
| } | |
| .guide-hero h1 { | |
| font-size:2.2rem; font-weight:800; letter-spacing:-.025em; line-height:1.1; margin:0 0 7px; | |
| background:var(--g-grad); -webkit-background-clip:text; background-clip:text; | |
| -webkit-text-fill-color:transparent; | |
| } | |
| .guide-hero .g-sub { color:var(--g-muted); font-size:.95rem; margin:0 auto 15px; max-width:560px; } | |
| .guide-hero .g-chips { display:flex; gap:8px; justify-content:center; flex-wrap:wrap; } | |
| .guide-hero .g-chip { | |
| font-size:.77rem; font-weight:600; color:#4338ca; background:rgba(79,70,229,.08); | |
| border:1px solid rgba(79,70,229,.18); padding:5px 13px; border-radius:999px; | |
| } | |
| /* ββ Tabs β segmented pills βββββββββββββββββββββββββββββββββββββββββββββββ */ | |
| .tab-nav { border:none !important; gap:6px !important; padding:6px !important; | |
| background:var(--g-tabbar); border:1px solid var(--g-line) !important; | |
| border-radius:15px !important; margin-bottom:16px; } | |
| .tab-nav button { border:none !important; background:transparent !important; | |
| color:var(--g-muted) !important; font-weight:600 !important; font-size:.91rem !important; | |
| padding:8px 15px !important; border-radius:10px !important; transition:all .18s ease; } | |
| .tab-nav button:hover { color:var(--g-ink) !important; background:var(--g-hover) !important; } | |
| .tab-nav button.selected { color:#fff !important; background:var(--g-grad) !important; | |
| box-shadow:0 6px 14px -6px rgba(79,70,229,.7); } | |
| /* ββ Theme toggle (sun/moon pill, fixed top-right) ββββββββββββββββββββββββ */ | |
| #theme-toggle { position:fixed !important; top:14px; right:16px; z-index:1000; | |
| min-width:0 !important; width:auto !important; flex:none !important; } | |
| #theme-toggle button { border-radius:999px !important; padding:7px 14px !important; | |
| font-weight:600 !important; font-size:.82rem !important; | |
| background:var(--g-surface) !important; border:1px solid var(--g-line) !important; | |
| color:var(--g-ink) !important; box-shadow:0 6px 16px -10px rgba(30,27,75,.5) !important; } | |
| #theme-toggle button:hover { transform:translateY(-1px); } | |
| /* ββ Cards, inputs, focus rings βββββββββββββββββββββββββββββββββββββββββββ */ | |
| .tab-pad { padding:8px 2px; } | |
| .badge-row { min-height:0; } | |
| textarea, input[type=text] { border-radius:12px !important; } | |
| textarea:focus, input[type=text]:focus { border-color:var(--g-accent) !important; | |
| box-shadow:0 0 0 3px rgba(79,70,229,.15) !important; } | |
| /* ββ Primary buttons get a subtle lift (gradient comes from the theme) βββββ */ | |
| button.primary:hover { transform:translateY(-1px); | |
| box-shadow:0 12px 24px -10px rgba(79,70,229,.85) !important; } | |
| /* ββ Progress: a prominent accent bar; hide Gradio's noisy ETA timer βββββββ */ | |
| /* The "2.0/9.7s" ETA is a queue estimate with no real basis, so we suppress | |
| the timer text and keep a single, clearly visible gradient progress bar. */ | |
| .meta-text, .meta-text-center, .progress-text { display:none !important; } | |
| .progress-bar-wrap, .wrap.progress-bar-wrap { | |
| background:rgba(79,70,229,.14) !important; border-radius:999px !important; | |
| height:6px !important; overflow:hidden !important; } | |
| .progress-bar, .eta-bar { | |
| background:var(--g-grad) !important; opacity:1 !important; | |
| height:6px !important; border-radius:999px !important; } | |
| /* ββ Inline busy spinner (HITL confirm + chained async steps) βββββββββββββ */ | |
| /* Pure-CSS ring that keeps spinning in the browser while the server works, so | |
| the user gets immediate feedback the moment they click. Colours come from the | |
| theme vars, so it adapts to light/dark automatically. */ | |
| .g-busy { display:inline-flex; align-items:center; gap:10px; | |
| font-weight:600; color:var(--g-muted); font-size:.92rem; padding:2px 0; } | |
| .g-spinner { width:16px; height:16px; flex:none; border-radius:50%; | |
| border:2.5px solid var(--g-chip-bd); border-top-color:var(--g-accent); | |
| animation:g-spin .7s linear infinite; } | |
| @keyframes g-spin { to { transform:rotate(360deg); } } | |
| /* ββ Chatbot: single scroll region (clamp wrapper, scroll inner) βββββββββββ */ | |
| .chat-box { max-height:58vh !important; overflow:hidden !important; border-radius:16px !important; } | |
| .chat-box > div { max-height:58vh !important; overflow-y:auto !important; } | |
| /* ββ Refined scrollbars βββββββββββββββββββββββββββββββββββββββββββββββββββ */ | |
| *::-webkit-scrollbar { width:10px; height:10px; } | |
| *::-webkit-scrollbar-thumb { background:#cbd2e1; border-radius:8px; | |
| border:2px solid transparent; background-clip:content-box; } | |
| *::-webkit-scrollbar-thumb:hover { background:#aab3c7; } | |
| """ | |
| # A configured theme handles the structural look (palette, fonts, radii, | |
| # gradient buttons) robustly via Gradio's CSS variables, so the CSS layer above | |
| # only has to handle the bespoke hero/tabs/polish. | |
| _THEME = gr.themes.Soft( | |
| primary_hue=gr.themes.colors.indigo, | |
| secondary_hue=gr.themes.colors.violet, | |
| neutral_hue=gr.themes.colors.slate, | |
| font=[gr.themes.GoogleFont("Inter"), "ui-sans-serif", "system-ui", "sans-serif"], | |
| font_mono=[gr.themes.GoogleFont("JetBrains Mono"), "ui-monospace", "monospace"], | |
| radius_size=gr.themes.sizes.radius_lg, | |
| spacing_size=gr.themes.sizes.spacing_md, | |
| ).set( | |
| body_background_fill="linear-gradient(180deg,#f7f8fc 0%,#eef1f9 100%)", | |
| body_background_fill_dark="linear-gradient(180deg,#0b1020 0%,#0f1730 100%)", | |
| block_background_fill="rgba(255,255,255,.86)", | |
| block_border_width="1px", | |
| block_shadow="0 10px 30px -22px rgba(30,27,75,.45)", | |
| button_primary_background_fill="linear-gradient(135deg,#4f46e5 0%,#7c3aed 100%)", | |
| button_primary_background_fill_hover="linear-gradient(135deg,#4338ca 0%,#6d28d9 100%)", | |
| button_primary_text_color="#ffffff", | |
| button_primary_shadow="0 8px 18px -8px rgba(79,70,229,.7)", | |
| button_large_radius="12px", | |
| button_small_radius="10px", | |
| input_background_fill="#ffffff", | |
| input_border_color="#e2e5ef", | |
| ) | |
| # Client-side dark/light toggle. Gradio drives its own palette off a `dark` class | |
| # on <body>; our custom CSS variables cascade from there too, so one class flip | |
| # restyles both Gradio's components and our bespoke hero/reveal/audit panels. | |
| # The choice is persisted in localStorage and restored on load. | |
| # Apply the `dark` class to every element Gradio might key off (html / body / | |
| # gradio-app) so the toggle is robust regardless of which one its theme reads. | |
| _THEME_APPLY_FN = """ | |
| function _guideApplyTheme(dark) { | |
| [document.documentElement, document.body, | |
| document.querySelector('gradio-app')].forEach(function (el) { | |
| if (el) el.classList.toggle('dark', dark); | |
| }); | |
| var b = document.querySelector('#theme-toggle button'); | |
| if (b) b.textContent = dark ? 'βοΈ Light' : 'π Dark'; | |
| } | |
| """ | |
| _THEME_TOGGLE_JS = """() => { | |
| %s | |
| const dark = !document.documentElement.classList.contains('dark'); | |
| _guideApplyTheme(dark); | |
| try { localStorage.setItem('guide-theme', dark ? 'dark' : 'light'); } catch (e) {} | |
| }""" % _THEME_APPLY_FN | |
| _THEME_LOAD_JS = """() => { | |
| %s | |
| // Light is the DEFAULT: go dark only if the user explicitly chose it before. | |
| let pref = 'light'; | |
| try { pref = localStorage.getItem('guide-theme') || 'light'; } catch (e) {} | |
| const dark = pref === 'dark'; | |
| // Re-assert a few times to override any system/Gradio dark applied late | |
| // during hydration, which would otherwise leave the page dark by default. | |
| _guideApplyTheme(dark); | |
| setTimeout(() => _guideApplyTheme(dark), 60); | |
| setTimeout(() => _guideApplyTheme(dark), 250); | |
| }""" % _THEME_APPLY_FN | |
| # ββ API helpers ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _api_create_session() -> str: | |
| r = requests.post(f"{API_BASE}/api/session/create", timeout=_TIMEOUT_SHORT) | |
| r.raise_for_status() | |
| return r.json()["session_id"] | |
| def _api_send_message(session_id: str, text: str) -> dict: | |
| r = requests.post( | |
| f"{API_BASE}/api/session/{session_id}/message", | |
| json={"text": text}, | |
| timeout=_TIMEOUT_CHAT, | |
| ) | |
| r.raise_for_status() | |
| return r.json() | |
| def _api_upload(session_id: str, filepath: str) -> dict: | |
| filename = Path(filepath).name | |
| with open(filepath, "rb") as fh: | |
| r = requests.post( | |
| f"{API_BASE}/api/session/{session_id}/upload", | |
| files={"file": (filename, fh)}, | |
| timeout=_TIMEOUT_UPLOAD, | |
| ) | |
| r.raise_for_status() | |
| return r.json() | |
| def _api_validate_entities(session_id: str, entities: dict) -> dict: | |
| r = requests.post( | |
| f"{API_BASE}/api/session/{session_id}/validate-entities", | |
| json={"entities": entities}, | |
| timeout=_TIMEOUT_CHAT, | |
| ) | |
| r.raise_for_status() | |
| return r.json() | |
| def _api_escalation_guide(session_id: str) -> dict: | |
| r = requests.post( | |
| f"{API_BASE}/api/session/{session_id}/escalation-guide", | |
| timeout=_TIMEOUT_CHAT, | |
| ) | |
| r.raise_for_status() | |
| return r.json() | |
| def _api_audit(session_id: str) -> dict: | |
| r = requests.get( | |
| f"{API_BASE}/api/session/{session_id}/audit", | |
| timeout=_TIMEOUT_SHORT, | |
| ) | |
| r.raise_for_status() | |
| return r.json() | |
| def _http_error_detail(exc: "requests.exceptions.HTTPError", fallback: str) -> str: | |
| """Pull the backend's user-friendly `detail` from an HTTPError, else fallback. | |
| The API maps LLM-provider hiccups (overloaded / rate-limited / timeout) to a | |
| calm `detail` string, so the UI can show it verbatim. | |
| """ | |
| try: | |
| detail = (exc.response.json() or {}).get("detail", "") | |
| except Exception: | |
| detail = "" | |
| if not detail: | |
| code = getattr(getattr(exc, "response", None), "status_code", None) | |
| if code == 503: | |
| detail = ("Claude is briefly overloaded. Please wait a few seconds " | |
| "and try again.") | |
| elif code == 429: | |
| detail = ("The AI is rate-limiting requests right now. Please wait a " | |
| "moment and try again.") | |
| return detail or fallback | |
| # ββ Display helpers ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Presidio placeholders look like <PERSON>, <EMAIL_ADDRESS>, <PHONE_NUMBER>. | |
| # Gradio renders chatbot bubbles and gr.Markdown as HTML, so a bare <PERSON> | |
| # is parsed as an unknown HTML tag and silently dropped β the letter appears to | |
| # cut off at "Yours sincerely, <". Escape only the angle brackets of these | |
| # ENTITY-style tokens so they render literally, without disturbing real markdown. | |
| _PLACEHOLDER_RE = re.compile(r"<([A-Z][A-Z0-9_]*)>") | |
| _PII_LABEL_MAP: dict[str, str] = { | |
| "PERSON": "Your name", | |
| "PHONE_NUMBER": "Your phone number", | |
| "EMAIL_ADDRESS": "Your email address", | |
| "IN_AADHAAR": "Your Aadhaar number", | |
| "IN_PAN": "Your PAN number", | |
| "CREDIT_CARD": "Your credit card number", | |
| "IBAN_CODE": "Your IBAN", | |
| "US_BANK_NUMBER": "Your bank account number", | |
| "IN_VEHICLE_REGISTRATION": "Your vehicle registration number", | |
| } | |
| def _detect_placeholders(text: str) -> list[str]: | |
| """Return unique placeholder token names found in text, in order of first appearance, capped at 6.""" | |
| if not text: | |
| return [] | |
| seen: list[str] = [] | |
| for m in _PLACEHOLDER_RE.finditer(text): | |
| token = m.group(1) | |
| if token not in seen: | |
| seen.append(token) | |
| if len(seen) == 6: | |
| break | |
| return seen | |
| def _escape_placeholders(text: str) -> str: | |
| """Make <ENTITY_TYPE> placeholders survive Gradio's HTML/markdown rendering.""" | |
| if not text: | |
| return text | |
| return _PLACEHOLDER_RE.sub(lambda m: f"<{m.group(1)}>", text) | |
| def _strip_markdown(text: str) -> str: | |
| """Remove inline Markdown so the complaint letter reads as clean plain text. | |
| Applied ONLY to the extracted draft (Complaint Draft tab + .txt download). | |
| The escalation guide keeps its Markdown β it renders in gr.Markdown β so this | |
| is deliberately not applied there. Doing it in code (not via the prompt) | |
| keeps the model's escalation formatting untouched. | |
| """ | |
| if not text: | |
| return text | |
| # **bold** / __bold__ β bold | |
| text = re.sub(r"\*\*(.+?)\*\*", r"\1", text) | |
| text = re.sub(r"__(.+?)__", r"\1", text) | |
| # *italic* / _italic_ β italic (avoid touching βΉ amounts or bare punctuation) | |
| text = re.sub(r"(?<!\w)\*(?!\s)(.+?)(?<!\s)\*(?!\w)", r"\1", text) | |
| text = re.sub(r"(?<!\w)_(?!\s)(.+?)(?<!\s)_(?!\w)", r"\1", text) | |
| # `code` β code | |
| text = re.sub(r"`(.+?)`", r"\1", text) | |
| # [text](url) β text | |
| text = re.sub(r"\[([^\]]+)\]\([^)]+\)", r"\1", text) | |
| # Stray/unbalanced emphasis markers left over (e.g. a trailing "**" on the | |
| # Subject line with no opening pair) β drop any remaining ** or __ runs. | |
| text = re.sub(r"\*\*|__", "", text) | |
| # leading "#" headings and "- " / "* " bullet markers β drop the marker only | |
| text = re.sub(r"^\s{0,3}#{1,6}\s+", "", text, flags=re.MULTILINE) | |
| text = re.sub(r"^(\s*)[*-]\s+", r"\1", text, flags=re.MULTILINE) | |
| return text | |
| # ββ Parse helpers ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _parse_hitl_entities_json(reply: str) -> dict[str, str]: | |
| """Extract entity values from the <!--ENTITIES:{...}--> block the agent emits at the HITL gate.""" | |
| m = re.search(r"<!--ENTITIES:(\{.*?\})-->", reply, re.DOTALL) | |
| if not m: | |
| return {} | |
| try: | |
| return json.loads(m.group(1)) | |
| except (ValueError, KeyError): | |
| return {} | |
| # The escalation guide always opens with a recognizable header that the letter | |
| # itself never contains as a line-start (the letter only says "escalate ... to | |
| # TRAI" mid-sentence). We split the reply at the FIRST such header line: the | |
| # letter is everything before it, the escalation guide is everything from it on. | |
| # This is robust whether or not the model emits the optional --- fences, which it | |
| # does inconsistently β every prior fence-based heuristic broke when it didn't. | |
| _ESCALATION_BOUNDARY_RE = re.compile( | |
| r"""^[ \t#>*_]* # optional leading markdown/emoji-adjacent chars | |
| (?:[^\w\s]\s*)? # an optional leading emoji/symbol (π π― π β¦) | |
| (?: | |
| (?:Your\s+)?(?:Recommended\s+)?Escalation\s+(?:Path|Sequence|Guide) | |
| | Recommended\s+Escalation | |
| | (?:Your\s+)?Step[ \t]*1\b | |
| | Step[ \t]*1[ \t]*[:\-β] | |
| )""", | |
| re.IGNORECASE | re.MULTILINE | re.VERBOSE, | |
| ) | |
| def _split_letter_and_escalation(reply: str) -> tuple[str, str]: | |
| """Split a combined reply into (letter, escalation_guide) at the first | |
| escalation-guide header. Falls back to --- fences, then to whole-reply.""" | |
| # Strip any --- fences the model emitted; they are noise for our purposes and | |
| # would otherwise leak into the plain-text letter. | |
| boundary = _ESCALATION_BOUNDARY_RE.search(reply) | |
| if boundary: | |
| return reply[:boundary.start()], reply[boundary.start():] | |
| # Fallback: legacy --- fence split (letter between first pair of fences). | |
| m = re.search(r"---\n(Subject:.*?)\n---(.*)", reply, re.DOTALL | re.IGNORECASE) | |
| if m: | |
| return m.group(1), m.group(2) | |
| return reply, "" | |
| # Valediction that closes the letter's signature block ("Yours sincerely," etc.). | |
| # The signature block (valediction Β· name Β· date Β· contact) is contiguous, so the | |
| # letter ends at the first blank line after the valediction. Anything past that β | |
| # attachment tips, "ask me to generate the escalation guide", other chatter β is | |
| # NOT part of the letter and must be kept out of the draft box / .txt / PDF. | |
| _VALEDICTION_RE = re.compile( | |
| r"^[ \t>*_]*(?:Yours\s+sincerely|Yours\s+faithfully|Yours\s+truly|" | |
| r"Sincerely|Faithfully|Regards|Warm\s+regards|Best\s+regards|Kind\s+regards)\b", | |
| re.IGNORECASE | re.MULTILINE, | |
| ) | |
| def _extract_draft(reply: str) -> str: | |
| """Pull the complaint letter only: Subject: β¦ through the signature block. | |
| Trailing conversational text the model sometimes appends after the signature | |
| (attachment tips, an invitation to generate the escalation guide, etc.) is | |
| dropped β those still appear in the chat reply, just not in the letter box. | |
| """ | |
| letter, _ = _split_letter_and_escalation(reply) | |
| # Trim to the actual letter: Subject: to end of the letter segment, with any | |
| # surrounding --- fences removed. | |
| letter = re.sub(r"^\s*---\s*$", "", letter, flags=re.MULTILINE) | |
| m = re.search(r"(Subject:.*)", letter, re.DOTALL | re.IGNORECASE) | |
| letter = (m.group(1) if m else letter).strip() | |
| # Cut everything after the contiguous signature block: find the valediction, | |
| # then stop at the first blank line following it. | |
| val = _VALEDICTION_RE.search(letter) | |
| if val: | |
| rest = letter[val.end():] | |
| blank = re.search(r"\n[ \t]*\n", rest) | |
| end = val.end() + (blank.start() if blank else len(rest)) | |
| letter = letter[:end].rstrip() | |
| return letter | |
| def _extract_escalation(reply: str) -> str: | |
| """Pull the escalation guide (everything from the escalation header onward).""" | |
| _, escalation = _split_letter_and_escalation(reply) | |
| escalation = re.sub(r"^\s*---\s*$", "", escalation, flags=re.MULTILINE) | |
| return escalation.strip() | |
| def _entities_to_rows(entities: list[dict]) -> list[list[str]]: | |
| """Convert entity dicts from the API to rows for gr.Dataframe.""" | |
| if not entities: | |
| return [["β", "β", "β"]] | |
| return [ | |
| [ | |
| e.get("text", ""), | |
| e.get("label", ""), | |
| f"{e.get('confidence', 0.0):.0%}", | |
| ] | |
| for e in entities | |
| ] | |
| def _normalise_prior(raw: str) -> str: | |
| """Map any truthy/falsy string to 'Yes' or 'No' for the Radio widget.""" | |
| return "Yes" if raw.strip().lower() in ("yes", "true", "1", "y") else "No" | |
| # ββ Redaction reveal (Feature 1) ββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Friendly entity-type labels shared by the reveal and the audit log. | |
| _PII_TYPE_FRIENDLY: dict[str, str] = { | |
| "PERSON": "Name", | |
| "PHONE_NUMBER": "Phone number", | |
| "EMAIL_ADDRESS": "Email address", | |
| "CREDIT_CARD": "Credit card", | |
| "IBAN_CODE": "IBAN", | |
| "US_BANK_NUMBER": "Bank account", | |
| "IN_AADHAAR": "Aadhaar number", | |
| "IN_PAN": "PAN number", | |
| "IN_VEHICLE_REGISTRATION": "Vehicle registration", | |
| } | |
| def _friendly_type(entity_type: str) -> str: | |
| return _PII_TYPE_FRIENDLY.get(entity_type, entity_type.replace("_", " ").title()) | |
| def _busy_html(label: str) -> str: | |
| """A small spinning ring + label for the HITL status line (renders in gr.HTML).""" | |
| return f'<div class="g-busy"><span class="g-spinner"></span>{label}</div>' | |
| def _highlight_original(original: str, redactions: list[dict]) -> str: | |
| """Re-render the user's original text with each PII span wrapped in a red chip.""" | |
| if not redactions: | |
| return html.escape(original) | |
| spans = sorted(redactions, key=lambda s: s.get("start", 0)) | |
| out, cursor = [], 0 | |
| for s in spans: | |
| start, end = s.get("start", 0), s.get("end", 0) | |
| if start < cursor or start > len(original): | |
| continue # overlapping/out-of-range span β skip defensively | |
| out.append(html.escape(original[cursor:start])) | |
| out.append( | |
| '<span style="background:var(--g-bad-bg);color:var(--g-bad-fg);border-radius:3px;' | |
| 'padding:0 3px;text-decoration:line-through;" ' | |
| f'title="{html.escape(_friendly_type(s.get("entity_type","")))}">' | |
| f'{html.escape(original[start:end])}</span>' | |
| ) | |
| cursor = end | |
| out.append(html.escape(original[cursor:])) | |
| return "".join(out) | |
| def _highlight_redacted(redacted: str) -> str: | |
| """Render redacted text with each <PLACEHOLDER> wrapped in a green chip. | |
| The text is HTML-escaped first (so <PERSON> becomes <PERSON>), then the | |
| escaped placeholder form is matched and wrapped. | |
| """ | |
| def _chip(m: re.Match) -> str: | |
| return ( | |
| '<span style="background:var(--g-good-bg);color:var(--g-good-fg);border-radius:3px;' | |
| f'padding:0 3px;font-weight:600;"><{m.group(1)}></span>' | |
| ) | |
| return re.sub(r"<([A-Z][A-Z0-9_]*)>", _chip, html.escape(redacted)) | |
| def _render_redaction_html(resp: dict) -> str: | |
| """Build the side-by-side 'what we protected' reveal from a /message response.""" | |
| redactions = resp.get("redactions") or [] | |
| original = resp.get("original_text") or "" | |
| redacted = resp.get("redacted_text") or "" | |
| if not redactions: | |
| return "" # nothing stripped β hide the panel | |
| chips = "".join( | |
| f'<span style="display:inline-block;background:var(--g-warn-bg);' | |
| f'border:1px solid var(--g-warn-bd);color:var(--g-ink);' | |
| f'border-radius:12px;padding:1px 9px;margin:2px;font-size:0.82em;">' | |
| f'{html.escape(_friendly_type(s.get("entity_type","")))}</span>' | |
| for s in redactions | |
| ) | |
| n = len(redactions) | |
| return f""" | |
| <div style="border:1px solid var(--g-good-bd);border-radius:12px;padding:12px 14px;margin:6px 0;background:var(--g-panel);color:var(--g-ink);"> | |
| <div style="font-weight:600;margin-bottom:8px;"> | |
| π {n} personal identifier{'s' if n != 1 else ''} removed locally β only the right-hand version was sent to the AI. | |
| </div> | |
| <div style="margin-bottom:10px;">{chips}</div> | |
| <div style="display:flex;gap:12px;flex-wrap:wrap;"> | |
| <div style="flex:1;min-width:240px;"> | |
| <div style="font-size:0.78em;color:var(--g-muted);margin-bottom:3px;text-transform:uppercase;letter-spacing:.04em;">What you typed (stayed on this server)</div> | |
| <div style="background:var(--g-surface-2);border:1px solid var(--g-line);border-radius:8px;padding:8px;font-size:0.9em;line-height:1.55;white-space:pre-wrap;">{_highlight_original(original, redactions)}</div> | |
| </div> | |
| <div style="flex:1;min-width:240px;"> | |
| <div style="font-size:0.78em;color:var(--g-muted);margin-bottom:3px;text-transform:uppercase;letter-spacing:.04em;">What the AI received</div> | |
| <div style="background:var(--g-surface-2);border:1px solid var(--g-line);border-radius:8px;padding:8px;font-size:0.9em;line-height:1.55;white-space:pre-wrap;">{_highlight_redacted(redacted)}</div> | |
| </div> | |
| </div> | |
| </div> | |
| """ | |
| # ββ Privacy audit trail (Feature 2) βββββββββββββββββββββββββββββββββββββββββββ | |
| _AUDIT_INTRO = ( | |
| "## π Privacy Audit Trail\n\n" | |
| "> **Our guarantee:** your personal data never leaves this server unprotected. " | |
| "Every message is redacted locally *before* a single character reaches the AI. " | |
| "Documents are read on-device β your images are never sent to any third party.\n\n" | |
| "Below is a complete, timestamped record of every event where your data was " | |
| "transmitted or processed. This is the *exact* text that left this server." | |
| ) | |
| def _render_audit_html(entries: list[dict]) -> str: | |
| """Render the audit entries as an HTML log beneath the guarantee banner.""" | |
| if not entries: | |
| return ( | |
| '<div style="color:var(--g-muted);padding:10px 0;">No activity yet β ' | |
| "start a conversation in the Chat tab and your privacy audit will appear here.</div>" | |
| ) | |
| total_stripped = sum(e.get("pii_count", 0) for e in entries) | |
| any_leak = any(e.get("leak_check") == "failed" for e in entries) | |
| banner_bg, banner_fg, banner_txt = ( | |
| ("var(--g-bad-bg)", "var(--g-bad-fg)", | |
| "β οΈ A potential leak was detected β see flagged entries below.") | |
| if any_leak | |
| else ("var(--g-good-bg)", "var(--g-good-fg)", | |
| f"β Verified: {total_stripped} personal identifier(s) stripped Β· " | |
| "0 raw identifiers transmitted across this session.") | |
| ) | |
| rows = [ | |
| f'<div style="background:{banner_bg};color:{banner_fg};border-radius:8px;' | |
| f'padding:9px 12px;margin-bottom:12px;font-weight:600;">{banner_txt}</div>' | |
| ] | |
| for e in entries: | |
| ts = html.escape(e.get("timestamp", "")[:19].replace("T", " ")) | |
| is_doc = e.get("event") == "document_local" | |
| icon = "π" if is_doc else "π€" | |
| check = e.get("leak_check", "n/a") | |
| if check == "passed": | |
| badge = '<span style="color:var(--g-good-fg);font-weight:600;">β 0 raw identifiers transmitted</span>' | |
| elif check == "failed": | |
| badge = '<span style="color:var(--g-bad-fg);font-weight:600;">β οΈ leak detected</span>' | |
| else: | |
| badge = '<span style="color:var(--g-muted);">π₯οΈ processed locally β nothing transmitted</span>' | |
| types = ", ".join(_friendly_type(t) for t in e.get("pii_types_found", [])) | |
| types_line = ( | |
| f'<div style="font-size:0.82em;color:var(--g-muted);margin-top:3px;">Stripped: {html.escape(types)}</div>' | |
| if types else "" | |
| ) | |
| transmitted = e.get("transmitted_text", "") | |
| transmitted_line = ( | |
| '<div style="font-size:0.82em;color:var(--g-muted);margin-top:6px;">Transmitted to AI:</div>' | |
| f'<div style="background:var(--g-surface-2);border:1px solid var(--g-line);border-radius:7px;padding:7px;' | |
| f'font-family:monospace;font-size:0.82em;white-space:pre-wrap;margin-top:2px;">' | |
| f'{_highlight_redacted(transmitted)}</div>' | |
| if transmitted else "" | |
| ) | |
| rows.append( | |
| f'<div style="border:1px solid var(--g-line);border-left:3px solid ' | |
| f'{"var(--g-bad-fg)" if check == "failed" else "#43a047"};border-radius:8px;' | |
| f'padding:10px 12px;margin-bottom:8px;background:var(--g-panel);color:var(--g-ink);">' | |
| f'<div style="display:flex;justify-content:space-between;gap:10px;flex-wrap:wrap;">' | |
| f'<span style="font-weight:600;">{icon} {html.escape(e.get("description",""))}</span>' | |
| f'<span style="font-size:0.8em;color:var(--g-muted);white-space:nowrap;">{ts} UTC</span>' | |
| f'</div>{types_line}' | |
| f'<div style="margin-top:4px;">{badge}</div>' | |
| f'{transmitted_line}</div>' | |
| ) | |
| return "".join(rows) | |
| def _refresh_audit(sid: str) -> str: | |
| if not sid: | |
| return _render_audit_html([]) | |
| try: | |
| data = _api_audit(sid) | |
| except Exception: | |
| return _render_audit_html([]) | |
| return _render_audit_html(data.get("entries", [])) | |
| # ββ PDF export (Feature 3) ββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _build_pdf(draft_text: str, out_path: str) -> str: | |
| """Typeset *draft_text* as a formal A4 complaint letter PDF at *out_path*. | |
| Uses reportlab Platypus. Blank-line-separated chunks become paragraphs; | |
| single newlines within a chunk are preserved as line breaks (so the To/From | |
| address blocks keep their shape). Raises ImportError if reportlab is absent. | |
| """ | |
| from reportlab.lib.pagesizes import A4 | |
| from reportlab.lib.styles import ParagraphStyle, getSampleStyleSheet | |
| from reportlab.lib.units import mm | |
| from reportlab.platypus import Paragraph, SimpleDocTemplate, Spacer | |
| from xml.sax.saxutils import escape as _xml_escape | |
| doc = SimpleDocTemplate( | |
| out_path, pagesize=A4, | |
| topMargin=24 * mm, bottomMargin=20 * mm, | |
| leftMargin=22 * mm, rightMargin=22 * mm, | |
| title="Consumer Complaint Letter", | |
| ) | |
| base = getSampleStyleSheet()["Normal"] | |
| body = ParagraphStyle( | |
| "Body", parent=base, fontName="Times-Roman", | |
| fontSize=11.5, leading=16, spaceAfter=10, | |
| ) | |
| footer = ParagraphStyle( | |
| "Footer", parent=base, fontName="Helvetica-Oblique", | |
| fontSize=8, textColor="#999999", spaceBefore=14, | |
| ) | |
| flow = [] | |
| for chunk in re.split(r"\n\s*\n", draft_text.strip()): | |
| if not chunk.strip(): | |
| continue | |
| safe = _xml_escape(chunk).replace("\n", "<br/>") | |
| flow.append(Paragraph(safe, body)) | |
| flow.append(Spacer(1, 2)) | |
| flow.append(Paragraph( | |
| "Generated by G.U.I.D.E. β Grievance Utility for Information Extraction, " | |
| "Drafting and Enrichment.", footer, | |
| )) | |
| doc.build(flow) | |
| return out_path | |
| # ββ Gradio application βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def build_app() -> gr.Blocks: | |
| """Construct and return the Gradio Blocks application.""" | |
| with gr.Blocks( | |
| title="G.U.I.D.E. β Consumer Complaint Assistant", | |
| theme=_THEME, | |
| css=_CSS, | |
| js=_THEME_LOAD_JS, | |
| ) as demo: | |
| # ββ Session state (one per browser tab) βββββββββββββββββββββββββββββββ | |
| session_id = gr.State("") | |
| pii_tokens = gr.State([]) | |
| # Holds the just-submitted chat payload between the instant-clear stage | |
| # and the (slow) API-response stage, so the textbox empties immediately. | |
| pending_payload = gr.State(None) | |
| # ββ Dark / light toggle (fixed top-right via #theme-toggle CSS) βββββββ | |
| theme_toggle = gr.Button("π Dark", elem_id="theme-toggle", scale=0) | |
| theme_toggle.click(fn=None, inputs=None, outputs=None, js=_THEME_TOGGLE_JS) | |
| # ββ Hero header βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| gr.HTML(""" | |
| <div class="guide-hero"> | |
| <div class="g-badge">βοΈ</div> | |
| <h1>G.U.I.D.E.</h1> | |
| <p class="g-sub"> | |
| Grievance Utility for Information Extraction, Drafting & Enrichment β | |
| turn a plain-language story into a ready-to-file complaint. | |
| </p> | |
| <div class="g-chips"> | |
| <span class="g-chip">π Privacy-first</span> | |
| <span class="g-chip">π§ Powered by Claude</span> | |
| <span class="g-chip">ποΈ Indian regulators</span> | |
| </div> | |
| </div> | |
| """) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Tabs | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Tabs(): | |
| # ββ Tab 1 β How To Guide βββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Tab("π How To Guide"): | |
| with gr.Column(elem_classes="tab-pad"): | |
| gr.Markdown(""" | |
| # Welcome to G.U.I.D.E. | |
| **G.U.I.D.E.** helps you write a formal complaint letter and find the right authority to escalate to β for free, in minutes. Just describe your problem in plain language and the app does the rest. | |
| --- | |
| ## Your Step-by-Step Flow | |
| **Step 1 β Step 2 β Step 3 β Step 4** | |
| | Step | Tab | What you do | | |
| |------|-----|-------------| | |
| | 1οΈβ£ | π¬ **Chat** | Describe your complaint and optionally attach a receipt, bill, or screenshot | | |
| | 2οΈβ£ | β **Verify Entities** | Review and confirm the key details extracted | | |
| | 3οΈβ£ | π **Complaint Draft** | Read, copy, or download your ready-to-send letter | | |
| | 4οΈβ£ | ποΈ **Escalation Guide** | Find the right regulatory authority with portal links | | |
| --- | |
| ## What Each Tab Does | |
| | Tab | What it does | | |
| |-----|-------------| | |
| | π¬ **Chat** | Describe your complaint in plain language and optionally attach a receipt, bill, or screenshot. G.U.I.D.E. asks follow-up questions, reads any attached document automatically, and collects all the details needed for your letter. The π **βSee what we protectedβ** panel shows, side by side, exactly which personal details were removed before anything reached the AI. | | |
| | β **Verify Entities** | Review and correct the key details G.U.I.D.E. extracted from your complaint. Once you click **Confirm & Generate Draft**, your letter is created. | | |
| | π **Complaint Draft** | Your ready-to-send formal complaint letter. Copy it to your clipboard or download it as a **.txt** or a formatted **PDF**. | | |
| | ποΈ **Escalation Guide** | Tells you exactly which authority to contact β NCH, TRAI, RBI Ombudsman, IRDAI β with direct links to their complaint portals. | | |
| | π **Privacy Audit** | A timestamped record of every time your data was transmitted or processed, with a verified *β0 raw identifiers transmittedβ* guarantee β so you can confirm your private details never left this server unprotected. | | |
| --- | |
| ## π Your Privacy | |
| > **Your details stay private.** Before your complaint is processed, personal information β your name, phone number, Aadhaar number, PAN, and bank account details β is automatically removed on this server. The AI only ever sees placeholders like `<PERSON>` or `<PHONE_NUMBER>`, never your real data. The π badge in the Chat tab confirms when this has happened, the **βSee what we protectedβ** panel shows you precisely what was removed, and the **π Privacy Audit** tab keeps a full transmission log you can inspect at any time. | |
| --- | |
| ## What Complaints Can G.U.I.D.E. Help With? | |
| - π **E-Commerce** β late delivery, wrong item, refund not received *(Flipkart, Amazon, Meesho, etc.)* | |
| - π‘ **Telecom** β poor network, wrong billing, SIM issues *(Jio, Airtel, Vi, BSNL, etc.)* | |
| - π¦ **Banking** β unauthorised transactions, loan disputes, account issues | |
| - π **Credit Score / CIBIL** β incorrect credit report, loan rejection due to wrong score | |
| - π‘οΈ **Insurance** β claim rejected, policy mis-sold, premium dispute | |
| - π **General** β any other consumer complaint | |
| --- | |
| *Ready to start? Click the **π¬ Chat** tab and describe your complaint.* | |
| """) | |
| # ββ Tab 2 β Chat βββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Tab("π¬ Chat"): | |
| with gr.Column(elem_classes="tab-pad"): | |
| chatbot = gr.Chatbot( | |
| label="G.U.I.D.E. conversation", | |
| # Viewport-relative height so the chatbot is the ONLY | |
| # scroll region. A fixed pixel height (e.g. 440) plus the | |
| # badge/input rows overflows the viewport, producing a | |
| # second (page) scrollbar on top of the chatbot's own. | |
| height="60vh", | |
| elem_classes="chat-box", | |
| placeholder=( | |
| "<b>Welcome to G.U.I.D.E.</b><br>" | |
| "Describe your complaint and I will help you draft a formal letter " | |
| "and identify the right regulatory authority to escalate to." | |
| ), | |
| ) | |
| privacy_badge = gr.HTML( | |
| value=_BADGE_HIDDEN, | |
| elem_classes="badge-row", | |
| ) | |
| with gr.Accordion( | |
| "π See what we protected", open=False, visible=False | |
| ) as redaction_accordion: | |
| redaction_reveal = gr.HTML(value="") | |
| chat_input = gr.MultimodalTextbox( | |
| placeholder="Describe your complaint⦠(optionally attach a receipt, bill, or screenshot)", | |
| file_types=[".pdf", ".png", ".jpg", ".jpeg", ".webp"], | |
| file_count="single", | |
| sources=["upload"], | |
| submit_btn="Send βΆ", | |
| show_label=False, | |
| # lines=1 so a single-line message submits on Enter | |
| # (Gradio only sends on plain Enter when content is 1 line; | |
| # it still grows to max_lines, and Shift+Enter adds a newline). | |
| lines=1, | |
| max_lines=6, | |
| autofocus=True, | |
| ) | |
| # ββ Tab 2 β Verify Entities (HITL) βββββββββββββββββββββββββββββββ | |
| with gr.Tab("β Verify Entities"): | |
| with gr.Column(elem_classes="tab-pad"): | |
| hitl_notice = gr.Markdown( | |
| value=( | |
| "> **Waiting for conversation** β Complete the chat until " | |
| "G.U.I.D.E. asks you to confirm your details here, then " | |
| "review, edit if needed, and click **Confirm & Generate Draft**." | |
| ) | |
| ) | |
| with gr.Group(): | |
| entity_provider = gr.Textbox( | |
| label="1. Company / Provider", | |
| placeholder="e.g. HDFC Bank, Flipkart, Airtel", | |
| ) | |
| with gr.Row(): | |
| entity_date = gr.Textbox( | |
| label="2. Date of Incident", | |
| placeholder="e.g. 12 March 2024", | |
| ) | |
| entity_amount = gr.Textbox( | |
| label="3. Amount Involved", | |
| placeholder="e.g. βΉ4,299 or N/A", | |
| ) | |
| with gr.Row(): | |
| entity_ref_id = gr.Textbox( | |
| label="4. Reference / Order ID", | |
| placeholder="e.g. OD-2930291 or N/A", | |
| ) | |
| entity_prior_contact = gr.Radio( | |
| label="5. Have you already contacted the company?", | |
| choices=["Yes", "No"], | |
| value="No", | |
| ) | |
| entity_resolution = gr.Textbox( | |
| label="6. Desired Resolution", | |
| placeholder="e.g. Full refund of βΉ4,299 within 7 working days", | |
| lines=2, | |
| ) | |
| confirm_btn = gr.Button( | |
| "β Confirm & Generate Draft", | |
| variant="primary", | |
| size="lg", | |
| ) | |
| # gr.HTML (not Markdown) so the inline busy spinner renders. | |
| hitl_status = gr.HTML(visible=False) | |
| # ββ Tab 3 β Complaint Draft βββββββββββββββββββββββββββββββββββββββ | |
| with gr.Tab("π Complaint Draft"): | |
| with gr.Column(elem_classes="tab-pad"): | |
| gr.Markdown( | |
| "Your formal complaint letter will appear here after you confirm " | |
| "your details in the **Verify Entities** tab. " | |
| "Use the copy button or download as a plain-text file." | |
| ) | |
| with gr.Column(visible=False) as pii_fill_section: | |
| gr.Markdown( | |
| "## Fill in your personal details\n" | |
| "The placeholders below were removed for privacy. " | |
| "Enter your real details and click **βοΈ Apply & Preview** " | |
| "to insert them into the letter before downloading." | |
| ) | |
| pii_input_1 = gr.Textbox(label="", visible=False, interactive=True) | |
| pii_input_2 = gr.Textbox(label="", visible=False, interactive=True) | |
| pii_input_3 = gr.Textbox(label="", visible=False, interactive=True) | |
| pii_input_4 = gr.Textbox(label="", visible=False, interactive=True) | |
| pii_input_5 = gr.Textbox(label="", visible=False, interactive=True) | |
| pii_input_6 = gr.Textbox(label="", visible=False, interactive=True) | |
| apply_pii_btn = gr.Button("βοΈ Apply & Preview", variant="secondary") | |
| draft_output = gr.Textbox( | |
| label="Complaint Letter", | |
| lines=26, | |
| interactive=True, | |
| placeholder="(Draft will appear here after you click Confirm & Generate Draft)", | |
| ) | |
| pii_warn_banner = gr.Markdown(visible=False) | |
| with gr.Row(): | |
| download_btn = gr.Button("πΎ Download as .txt") | |
| download_pdf_btn = gr.Button("π Download as PDF", variant="primary") | |
| with gr.Row(): | |
| draft_file = gr.File( | |
| label="Click to download (.txt)", | |
| visible=False, | |
| interactive=False, | |
| ) | |
| draft_file_pdf = gr.File( | |
| label="Click to download (.pdf)", | |
| visible=False, | |
| interactive=False, | |
| ) | |
| # ββ Tab 5 β Escalation Guide βββββββββββββββββββββββββββββββββββββ | |
| with gr.Tab("ποΈ Escalation Guide"): | |
| with gr.Column(elem_classes="tab-pad"): | |
| gr.Markdown( | |
| "After your complaint draft is generated, G.U.I.D.E. recommends " | |
| "the appropriate Indian regulatory authorities to contact, " | |
| "in the order you should approach them." | |
| ) | |
| escalation_md = gr.Markdown( | |
| value=( | |
| "> **Pending** β Escalation recommendations will appear here " | |
| "after the complaint draft is generated." | |
| ) | |
| ) | |
| # ββ Tab β Privacy Audit ββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Tab("π Privacy Audit"): | |
| with gr.Column(elem_classes="tab-pad"): | |
| gr.Markdown(_AUDIT_INTRO) | |
| refresh_audit_btn = gr.Button("π Refresh audit log", size="sm") | |
| audit_log = gr.HTML(value=_render_audit_html([])) | |
| # ββ Tab 6 β About βββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Tab("βΉοΈ About"): | |
| gr.Markdown(""" | |
| ## G.U.I.D.E. β Technical Overview | |
| **G.U.I.D.E.** (Grievance Utility for Information Extraction, Drafting and Enrichment) | |
| is a privacy-first consumer complaint assistant for Indian consumers built as part of | |
| the IISc Deep Learning course project. | |
| ### Four-Layer Pipeline | |
| | Layer | Technology | Purpose | | |
| |-------|-----------|---------| | |
| | **Privacy** | Microsoft Presidio + spaCy `en_core_web_lg` | Local PII redaction before any API call | | |
| | **Deep Learning** | DistilBERT + ViT (HuggingFace Transformers) | Domain classification, NER, document analysis | | |
| | **Agent** | Claude `claude-opus-4-8` (Anthropic) | Orchestration, drafting, escalation routing | | |
| | **Frontend** | Gradio 4.x | This interface | | |
| ### Privacy Guarantee | |
| > π Your name, phone number, Aadhaar UID, PAN number, and account numbers are | |
| > **redacted locally** using Microsoft Presidio *before* any text is sent to Claude. | |
| > The AI only ever sees `<PERSON>`, `<PHONE_NUMBER>`, `<IN_AADHAAR>` β never your | |
| > actual data. The π badge in the Chat tab confirms when redaction has taken place. | |
| > | |
| > The **Verify Entities** tab (the HITL step) lets you supply correct readable labels | |
| > β e.g., "HDFC Bank" instead of `<ORG>` β that appear in the final draft, without | |
| > ever exposing the original PII to the model. | |
| ### Deep Learning Models | |
| | Model | Architecture | Training Data | | |
| |-------|-------------|---------------| | |
| | **DomainClassifier** | DistilBERT + linear head, 6 classes | CFPB Consumer Complaint Database | | |
| | **EvidenceNER** | DistilBERT token classifier, BIO tags | ~4 000 synthetic complaint sentences | | |
| | **DocumentViT** | `google/vit-base-patch16-224` | Patch-variance scoring on document images | | |
| | **NextActionPredictor** | MLP 12β64β64β6 | ~6 000 synthetic (domain, entity, prior) samples | | |
| ### Supported Complaint Domains & Regulators | |
| | Domain | Primary Escalation Authority | | |
| |--------|------------------------------| | |
| | E-Commerce | National Consumer Helpline (NCH) Β· `consumerhelpline.gov.in` | | |
| | Telecom | TRAI β Telecom Regulatory Authority of India Β· `trai.gov.in` | | |
| | Banking | RBI Integrated Ombudsman Scheme Β· `cms.rbi.org.in` | | |
| | Credit Score / CIBIL | RBI Ombudsman Β· `cms.rbi.org.in` | | |
| | Insurance | IRDAI Bima Bharosa Β· `bimabharosa.irdai.gov.in` | | |
| | General | Consumer Disputes Redressal Commission Β· `edaakhil.nic.in` | | |
| ### Tech Stack | |
| `fastapi` Β· `uvicorn` Β· `gradio` Β· `anthropic` Β· `presidio-analyzer` Β· | |
| `transformers` Β· `torch` Β· `pytesseract` Β· `pdfplumber` Β· `spacy` | |
| """) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Event handlers | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # ββ App load: initialise session ββββββββββββββββββββββββββββββββββββββ | |
| def _on_load(): | |
| try: | |
| return _api_create_session() | |
| except Exception: | |
| return "" # handlers will surface the error on first use | |
| demo.load(fn=_on_load, inputs=[], outputs=[session_id]) | |
| # ββ Shared "no change" sentinel helpers βββββββββββββββββββββββββββββββ | |
| def _nu(): | |
| return gr.update() | |
| # ββ Chat submit β two stages so the textbox clears INSTANTLY ββββββββββ | |
| # Stage 1 (_echo_user): fast, no network β clear the input, echo the | |
| # user's bubble, stash the payload in State. | |
| # Stage 2 (_respond): the slow API work, then render the assistant reply | |
| # and side panels. Splitting them means the box empties on submit | |
| # instead of only when the response returns. | |
| def _echo_user(payload: dict, history: list): | |
| """Instant: clear the box, show the user's message, carry payload forward.""" | |
| payload = payload or {} | |
| message = (payload.get("text") or "").strip() | |
| files = payload.get("files") or [] | |
| if not message and not files: | |
| # Nothing submitted β leave the box untouched, nothing pending. | |
| return gr.update(), history, None | |
| user_label = message if message else "π [document attached]" | |
| new_hist = history + [{"role": "user", "content": user_label}] | |
| return None, new_hist, payload # None clears the MultimodalTextbox | |
| # Stage 2 outputs (12): | |
| # session_id | chatbot | privacy_badge | hitl_notice | |
| # entity_provider | entity_date | entity_amount | |
| # entity_ref_id | entity_prior_contact | entity_resolution | |
| # redaction_accordion | redaction_reveal | |
| def _respond(payload: dict, history: list, sid: str): | |
| _hide_reveal = gr.update(visible=False) | |
| def _noop(): | |
| return (sid, history, _BADGE_HIDDEN, _nu(), | |
| _nu(), _nu(), _nu(), _nu(), _nu(), _nu(), | |
| _hide_reveal, _nu()) | |
| if not payload: | |
| return _noop() | |
| message = (payload.get("text") or "").strip() | |
| files = payload.get("files") or [] | |
| def _err(txt): | |
| # The user bubble is already in history (added by _echo_user). | |
| return ( | |
| sid, | |
| history + [{"role": "assistant", "content": txt}], | |
| _BADGE_HIDDEN, _nu(), _nu(), _nu(), _nu(), _nu(), _nu(), _nu(), | |
| _hide_reveal, _nu(), | |
| ) | |
| # Lazy session creation β recovers from API-not-ready-at-load-time race. | |
| if not sid: | |
| try: | |
| sid = _api_create_session() | |
| except Exception: | |
| return _err( | |
| "β οΈ No active session β the backend may be offline. " | |
| "Reload the page or check that the API server is running on port 8000." | |
| ) | |
| # --- inline document upload (if file attached) --- | |
| history_after_upload = history | |
| if files: | |
| filepath = files[0] if isinstance(files[0], str) else files[0].get("path", "") | |
| try: | |
| _api_upload(sid, filepath) | |
| except requests.exceptions.ConnectionError: | |
| return _err("β οΈ Could not reach the backend to process the document.") | |
| except requests.exceptions.HTTPError as exc: | |
| detail = "" | |
| try: | |
| detail = exc.response.json().get("detail", "") | |
| except Exception: | |
| pass | |
| return _err( | |
| f"β οΈ Could not process the document: {detail or exc}. " | |
| "Please try again or describe the details manually." | |
| ) | |
| except Exception as exc: | |
| return _err( | |
| f"β οΈ Could not process the document: {exc}. " | |
| "Please try again or describe the details manually." | |
| ) | |
| # Acknowledge upload after the user bubble, before the agent reply. | |
| history_after_upload = history + [ | |
| {"role": "assistant", | |
| "content": "π Document received β I'll extract the relevant details when processing your complaint."} | |
| ] | |
| # --- call API --- | |
| try: | |
| resp = _api_send_message(sid, message) | |
| except requests.exceptions.ConnectionError: | |
| return _err( | |
| "β οΈ Cannot reach the G.U.I.D.E. backend at " | |
| "http://localhost:8000. Please start the API server." | |
| ) | |
| except requests.exceptions.HTTPError as exc: | |
| # Backend maps LLM provider issues (overloaded / rate-limited / | |
| # timeout) to a friendly `detail` β show it as-is. | |
| return _err("β οΈ " + _http_error_detail( | |
| exc, | |
| "The assistant couldn't process that just now. " | |
| "Please try again in a moment.", | |
| )) | |
| except Exception as exc: | |
| return _err(f"β οΈ Something went wrong: {exc}") | |
| reply = resp.get("reply", "") | |
| pii_hit = resp.get("pii_redacted", False) | |
| badge = _BADGE_HTML if pii_hit else _BADGE_HIDDEN | |
| # Escape <ENTITY_TYPE> placeholders so the chatbot (markdownβHTML) | |
| # doesn't swallow them as bogus HTML tags. | |
| new_hist = history_after_upload + [ | |
| {"role": "assistant", "content": _escape_placeholders(reply)}, | |
| ] | |
| # --- try to populate HITL fields from reply (uses raw, unescaped reply) --- | |
| parsed = _parse_hitl_entities_json(reply) | |
| if parsed: | |
| notice = gr.update( | |
| value=( | |
| "β **G.U.I.D.E. has extracted your complaint details.** \n" | |
| "Review the fields below, edit anything incorrect, " | |
| "then click **Confirm & Generate Draft**." | |
| ) | |
| ) | |
| else: | |
| notice = _nu() | |
| def _field(key: str, radio: bool = False): | |
| val = parsed.get(key) | |
| if val is None: | |
| return _nu() | |
| if radio: | |
| return _normalise_prior(val) | |
| return val | |
| # --- side-by-side redaction reveal --- | |
| reveal_html = _render_redaction_html(resp) | |
| reveal_accordion = gr.update(visible=bool(reveal_html), open=False) | |
| return ( | |
| sid, | |
| new_hist, | |
| badge, | |
| notice, | |
| _field("provider"), | |
| _field("incident_date"), | |
| _field("amount"), | |
| _field("reference_id"), | |
| _field("prior_contact", radio=True), | |
| _field("desired_resolution"), | |
| reveal_accordion, | |
| reveal_html, | |
| ) | |
| _respond_outputs = [ | |
| session_id, chatbot, privacy_badge, hitl_notice, | |
| entity_provider, entity_date, entity_amount, | |
| entity_ref_id, entity_prior_contact, entity_resolution, | |
| redaction_accordion, redaction_reveal, | |
| ] | |
| chat_input.submit( | |
| fn=_echo_user, | |
| inputs=[chat_input, chatbot], | |
| outputs=[chat_input, chatbot, pending_payload], | |
| show_progress="hidden", | |
| ).then( | |
| fn=_respond, | |
| inputs=[pending_payload, chatbot, session_id], | |
| outputs=_respond_outputs, | |
| # One slim progress bar on the chat only β not the duplicate animated | |
| # loader Gradio draws over every output component by default. | |
| show_progress="minimal", | |
| show_progress_on=[chatbot], | |
| ).then( | |
| # Keep the privacy audit trail live as the conversation progresses. | |
| fn=_refresh_audit, | |
| inputs=[session_id], | |
| outputs=[audit_log], | |
| show_progress="hidden", | |
| ) | |
| # ββ HITL confirm ββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Outputs (13): | |
| # chatbot | hitl_status | draft_output | escalation_md | hitl_notice | |
| # pii_tokens | pii_fill_section | |
| # pii_input_1 β¦ pii_input_6 | |
| def _handle_confirm( | |
| sid: str, | |
| provider: str, date: str, amount: str, | |
| ref_id: str, prior: str, resolution: str, | |
| history: list, | |
| ): | |
| _no_pii = [[], gr.update(visible=False)] + [gr.update(visible=False, value="", label="")] * 6 | |
| _err = lambda msg: ( | |
| history, | |
| gr.update(visible=True, value=msg), | |
| _nu(), _nu(), _nu(), | |
| *_no_pii, | |
| ) | |
| if not sid: | |
| return _err("β οΈ No active session. Reload the page.") | |
| # Build entity dict β omit blanks, but always include prior_contact | |
| entities: dict = { | |
| k: v for k, v in { | |
| "ORG": provider.strip(), | |
| "DATE": date.strip(), | |
| "AMOUNT": amount.strip(), | |
| "REF_ID": ref_id.strip(), | |
| "desired_resolution": resolution.strip(), | |
| }.items() | |
| if v not in ("", None, False) | |
| } | |
| entities["prior_contact"] = prior == "Yes" | |
| try: | |
| resp = _api_validate_entities(sid, entities) | |
| except requests.exceptions.ConnectionError: | |
| return _err("β οΈ Cannot reach the backend.") | |
| except requests.exceptions.HTTPError as exc: | |
| return _err("β οΈ " + _http_error_detail( | |
| exc, | |
| "Couldn't generate the draft just now. Please try again in a moment.", | |
| )) | |
| except Exception as exc: | |
| return _err(f"β οΈ Error: {exc}") | |
| reply = resp.get("reply", "") | |
| # Chatbot renders markdownβHTML, so escape <ENTITY_TYPE> placeholders. | |
| new_hist = history + [{"role": "user", "content": "[Entities confirmed β ]"}, | |
| {"role": "assistant", "content": _escape_placeholders(reply)}] | |
| # Draft goes to a plain Textbox + .txt download β keep placeholders | |
| # literal (no escape) and strip Markdown so it reads as clean text. | |
| # The escalation guide is generated by a SEPARATE follow-up request | |
| # (_handle_escalation, chained via .then) to stay under the token cap, | |
| # so this turn returns the letter only. | |
| draft = _strip_markdown(_extract_draft(reply)) | |
| notice_update = gr.update( | |
| value="β **Draft generated.** Generating the escalation guideβ¦" | |
| ) | |
| # Detect remaining PII placeholders and configure fill-in form. | |
| tokens = _detect_placeholders(draft) | |
| all_inputs = [pii_input_1, pii_input_2, pii_input_3, pii_input_4, pii_input_5, pii_input_6] | |
| input_updates = [] | |
| for i, inp in enumerate(all_inputs): | |
| if i < len(tokens): | |
| token = tokens[i] | |
| input_updates.append(gr.update( | |
| visible=True, | |
| label=_PII_LABEL_MAP.get(token, f"Your {token}"), | |
| value="", | |
| )) | |
| else: | |
| input_updates.append(gr.update(visible=False, value="", label="")) | |
| return ( | |
| new_hist, | |
| # Keep the spinner alive β the escalation guide is still being fetched | |
| # by the chained step below. | |
| gr.update(visible=True, value=_busy_html("Draft ready β fetching your escalation guideβ¦")), | |
| draft, | |
| gr.update(value="β³ Generating escalation guideβ¦"), | |
| notice_update, | |
| tokens, | |
| gr.update(visible=bool(tokens)), | |
| *input_updates, | |
| ) | |
| # Second request: escalation guide. Chained after the draft returns so | |
| # the two never share one minute's token budget. | |
| def _handle_escalation(sid: str, history: list): | |
| _keep = lambda msg: (history, gr.update(value=msg)) | |
| if not sid: | |
| return _keep( | |
| "Escalation guide unavailable β no active session. " | |
| 'Ask G.U.I.D.E. "What should I do next?" in the Chat tab.' | |
| ) | |
| if _SPLIT_DELAY > 0: | |
| time.sleep(_SPLIT_DELAY) | |
| try: | |
| resp = _api_escalation_guide(sid) | |
| except requests.exceptions.HTTPError as exc: | |
| return _keep( | |
| "β οΈ " + _http_error_detail( | |
| exc, | |
| "Couldn't fetch the escalation guide just now.", | |
| ) + '\n\nYou can ask G.U.I.D.E. "What should I do next?" in the Chat tab.' | |
| ) | |
| except Exception as exc: | |
| return _keep( | |
| f"β οΈ Escalation guide error: {exc}\n\n" | |
| 'Ask G.U.I.D.E. "What should I do next?" in the Chat tab.' | |
| ) | |
| reply = resp.get("reply", "") | |
| escal = _extract_escalation(reply) or reply | |
| escal_final = _escape_placeholders(escal) or ( | |
| "Escalation recommendations were not returned by the agent. " | |
| 'Ask G.U.I.D.E. "What should I do next?" in the Chat tab.' | |
| ) | |
| new_hist = history + [ | |
| {"role": "assistant", "content": _escape_placeholders(reply)} | |
| ] | |
| return new_hist, gr.update(value=escal_final) | |
| # Show the busy spinner the instant the button is clicked (this step | |
| # returns immediately), then run the slow draft + escalation steps, then | |
| # clear the spinner and re-enable the button. | |
| def _confirm_busy(): | |
| return ( | |
| gr.update(visible=True, value=_busy_html("Generating your complaint draftβ¦")), | |
| gr.update(interactive=False), | |
| ) | |
| def _confirm_done(status_html: str): | |
| # Re-enable the button. Preserve any β οΈ error _handle_confirm left in | |
| # place; otherwise replace the spinner with a calm completion note. | |
| if status_html and "β οΈ" in status_html: | |
| return gr.update(), gr.update(interactive=True) | |
| return ( | |
| gr.update(visible=True, value="β Draft ready β see the Complaint Draft tab."), | |
| gr.update(interactive=True), | |
| ) | |
| confirm_btn.click( | |
| fn=_confirm_busy, | |
| inputs=None, | |
| outputs=[hitl_status, confirm_btn], | |
| show_progress="hidden", | |
| ).then( | |
| fn=_handle_confirm, | |
| inputs=[ | |
| session_id, | |
| entity_provider, entity_date, entity_amount, | |
| entity_ref_id, entity_prior_contact, entity_resolution, | |
| chatbot, | |
| ], | |
| outputs=[ | |
| chatbot, hitl_status, draft_output, escalation_md, hitl_notice, | |
| pii_tokens, pii_fill_section, | |
| pii_input_1, pii_input_2, pii_input_3, | |
| pii_input_4, pii_input_5, pii_input_6, | |
| ], | |
| show_progress="minimal", | |
| show_progress_on=[draft_output], | |
| ).then( | |
| fn=_handle_escalation, | |
| inputs=[session_id, chatbot], | |
| outputs=[chatbot, escalation_md], | |
| show_progress="minimal", | |
| show_progress_on=[escalation_md], | |
| ).then( | |
| fn=_confirm_done, | |
| inputs=[hitl_status], | |
| outputs=[hitl_status, confirm_btn], | |
| show_progress="hidden", | |
| ) | |
| # ββ Apply PII fill-in ββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _handle_apply_pii(draft, tokens, v1, v2, v3, v4, v5, v6): | |
| if not draft: | |
| return _nu() | |
| values = [v1, v2, v3, v4, v5, v6] | |
| for token, value in zip(tokens, values): | |
| if value and value.strip(): | |
| draft = re.sub(rf"<{re.escape(token)}>", value.strip(), draft) | |
| return gr.update(value=draft) | |
| apply_pii_btn.click( | |
| fn=_handle_apply_pii, | |
| inputs=[draft_output, pii_tokens, | |
| pii_input_1, pii_input_2, pii_input_3, | |
| pii_input_4, pii_input_5, pii_input_6], | |
| outputs=[draft_output], | |
| ) | |
| # ββ Download draft βββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Outputs (2): draft_file | pii_warn_banner | |
| def _handle_download(draft_text: str): | |
| if not draft_text or not draft_text.strip(): | |
| return gr.update(visible=False), gr.update(visible=False) | |
| remaining = _detect_placeholders(draft_text) | |
| if remaining: | |
| token_list = ", ".join(f"`<{t}>`" for t in remaining) | |
| warn = gr.update( | |
| visible=True, | |
| value=( | |
| f"β οΈ Your letter still contains unfilled placeholders: {token_list}. " | |
| "Fill them in above and click **βοΈ Apply & Preview** before sending." | |
| ), | |
| ) | |
| else: | |
| warn = gr.update(visible=False) | |
| tmp = tempfile.NamedTemporaryFile( | |
| mode="w", | |
| suffix=".txt", | |
| prefix="guide_complaint_", | |
| delete=False, | |
| encoding="utf-8", | |
| ) | |
| tmp.write(draft_text) | |
| tmp.close() | |
| return gr.update(visible=True, value=tmp.name), warn | |
| download_btn.click( | |
| fn=_handle_download, | |
| inputs=[draft_output], | |
| outputs=[draft_file, pii_warn_banner], | |
| ) | |
| # ββ Download draft as PDF ββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Outputs (2): draft_file_pdf | pii_warn_banner | |
| def _handle_download_pdf(draft_text: str): | |
| if not draft_text or not draft_text.strip(): | |
| return gr.update(visible=False), gr.update(visible=False) | |
| remaining = _detect_placeholders(draft_text) | |
| if remaining: | |
| token_list = ", ".join(f"`<{t}>`" for t in remaining) | |
| warn = gr.update( | |
| visible=True, | |
| value=( | |
| f"β οΈ Your letter still contains unfilled placeholders: {token_list}. " | |
| "Fill them in above and click **βοΈ Apply & Preview** before sending." | |
| ), | |
| ) | |
| else: | |
| warn = gr.update(visible=False) | |
| try: | |
| tmp = tempfile.NamedTemporaryFile( | |
| suffix=".pdf", prefix="guide_complaint_", delete=False, | |
| ) | |
| tmp.close() | |
| _build_pdf(draft_text, tmp.name) | |
| except ImportError: | |
| return gr.update(visible=False), gr.update( | |
| visible=True, | |
| value="β οΈ PDF export needs the `reportlab` package. " | |
| "Run `pip install reportlab`, or use **Download as .txt**.", | |
| ) | |
| except Exception as exc: | |
| return gr.update(visible=False), gr.update( | |
| visible=True, value=f"β οΈ Could not generate PDF: {exc}", | |
| ) | |
| return gr.update(visible=True, value=tmp.name), warn | |
| download_pdf_btn.click( | |
| fn=_handle_download_pdf, | |
| inputs=[draft_output], | |
| outputs=[draft_file_pdf, pii_warn_banner], | |
| ) | |
| # ββ Refresh privacy audit ββββββββββββββββββββββββββββββββββββββββββββββ | |
| refresh_audit_btn.click( | |
| fn=_refresh_audit, | |
| inputs=[session_id], | |
| outputs=[audit_log], | |
| ) | |
| return demo | |
| if __name__ == "__main__": | |
| # theme + css are applied on gr.Blocks (in build_app), not on launch(). | |
| build_app().launch( | |
| server_port=int(os.getenv("GRADIO_SERVER_PORT", "7860")), | |
| server_name="0.0.0.0", | |
| show_error=True, | |
| favicon_path=None, | |
| ) | |