"""app.py — PrivacyShield Gradio firewall UI + orchestration (§5). Layout: paste text -> Sanitize -> sanitized output + findings table + "N blocked · 0 leaked" banner -> optional simulated LLM round-trip (mask -> stub LLM -> restore) -> downloadable audit log (placeholders only). The vault (placeholder -> original) lives only in gr.State (per-session, in-memory) and is never written to the audit log. """ import json import tempfile # --- Workaround for a Gradio 4.44 + Python 3.13 bug: building the API schema # crashes with "argument of type 'bool' is not iterable" on boolean JSON-schema # nodes (additionalProperties: true). Patch the schema walker to treat bool # schemas as "Any". Cosmetic only — affects the /info type hints, nothing else. try: import gradio_client.utils as _gcu if hasattr(_gcu, "_json_schema_to_python_type"): _orig_json_schema = _gcu._json_schema_to_python_type def _safe_json_schema(schema, defs=None): if isinstance(schema, bool): return "Any" return _orig_json_schema(schema, defs) _gcu._json_schema_to_python_type = _safe_json_schema if hasattr(_gcu, "get_type"): _orig_get_type = _gcu.get_type def _safe_get_type(schema): if not isinstance(schema, dict): return "Any" return _orig_get_type(schema) _gcu.get_type = _safe_get_type except Exception: pass # --- end workaround --- import gradio as gr from firewall import audit_log, round_trip, sanitize PII_EXAMPLE = ( "Hi team, this is Ramesh Kumar from Acme Traders Pvt Ltd. My number is " "9876543210 and email ramesh.kumar1@gmail.com. Please update my KYC — " "Aadhaar 2341 7634 8900, PAN ABCDE1234F. Refund my payment to card " "4532 0111 1222 2233 and ship the replacement to 12, MG Road, Pune, " "Maharashtra - 411001." ) SECRET_EXAMPLE = ( "# config.py — DO NOT COMMIT\n" 'AWS_ACCESS_KEY_ID = "AKIAIOSFODNN7EXAMPLE"\n' 'AWS_SECRET_ACCESS_KEY = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"\n' 'JWT_TOKEN = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9' '.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ' '.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c"\n' 'SLACK_WEBHOOK = "xoxb-1234567890-abcdefghijklmnop"\n' 'ADMIN_EMAIL = "admin@acmetraders.com"\n' "-----BEGIN RSA PRIVATE KEY-----\n" "MIIBOgIBAAJBAK0z9k1234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJ\n" "-----END RSA PRIVATE KEY-----" ) FINDINGS_HEADERS = ["type", "masked value", "source", "confidence"] def _findings_rows(findings: list[dict]) -> list[list]: return [[f["type"], f["masked_value"], f["source"], f["confidence"]] for f in findings] def _banner(result: dict) -> str: note = "" if result["model_status"] == "off": note = " _(rules-only mode — fine-tuned model disabled)_" elif result["model_status"] == "unavailable": note = " _(rules-only mode — fine-tuned model unavailable, detectors still active)_" return f"### 🛡️ {result['blocked']} items blocked · 0 leaked{note}" def do_sanitize(text: str, pii: bool, secrets: bool, use_model: bool): if not text or not text.strip(): return "", [], "### 🛡️ 0 items blocked · 0 leaked", None result = sanitize(text, pii=pii, secrets=secrets, use_model=use_model) return result["sanitized_text"], _findings_rows(result["findings"]), _banner(result), result def do_round_trip(text: str, pii: bool, secrets: bool, use_model: bool): if not text or not text.strip(): return "", "", "", [], "### 🛡️ 0 items blocked · 0 leaked", None result = round_trip(text, pii=pii, secrets=secrets, use_model=use_model) return ( result["sanitized_text"], result["llm_response"], result["restored_response"], _findings_rows(result["findings"]), _banner(result), result, ) def do_download(result: dict | None): if not result: return None with tempfile.NamedTemporaryFile( mode="w", suffix=".json", prefix="privacyshield_audit_", delete=False, encoding="utf-8" ) as f: json.dump(audit_log(result), f, indent=2) path = f.name return path with gr.Blocks(title="PrivacyShield") as demo: gr.Markdown("# 🛡️ PrivacyShield — your local firewall for LLMs") gr.Markdown( "Strip PII & secrets before they ever reach an API. " "Nothing sensitive leaves your machine." ) input_text = gr.Textbox( label="Paste a prompt / code snippet", lines=8, placeholder="Paste text here…" ) with gr.Row(): pii_example_btn = gr.Button("Try: PII example") secret_example_btn = gr.Button("Try: leaked-secret example") with gr.Row(): pii_cb = gr.Checkbox(value=True, label="PII") secrets_cb = gr.Checkbox(value=True, label="Secrets") model_cb = gr.Checkbox(value=True, label="Use fine-tuned model") sanitize_btn = gr.Button("🛡️ Sanitize", variant="primary") banner = gr.Markdown("### 🛡️ 0 items blocked · 0 leaked") sanitized_output = gr.Textbox( label="Sanitized output (placeholders shown)", lines=8, interactive=False ) findings = gr.Dataframe(headers=FINDINGS_HEADERS, label="Findings", interactive=False) with gr.Accordion("⚙️ Simulate LLM round-trip", open=False): gr.Markdown( "Sends the **sanitized** text to a stub LLM (templated echo, no API key " "needed) and restores the original values into the response." ) round_trip_btn = gr.Button("Simulate LLM round-trip") rt_sanitized = gr.Textbox(label="1. Sanitized prompt -> LLM", lines=4, interactive=False) rt_llm = gr.Textbox(label="2. LLM (stub) response", lines=4, interactive=False) rt_restored = gr.Textbox( label="3. Response with originals RESTORED", lines=4, interactive=False ) download_btn = gr.Button("⬇ Download audit log (JSON)") audit_file = gr.File(label="audit_log.json (placeholders only — never raw values)") state = gr.State() pii_example_btn.click(lambda: PII_EXAMPLE, outputs=input_text) secret_example_btn.click(lambda: SECRET_EXAMPLE, outputs=input_text) sanitize_btn.click( do_sanitize, inputs=[input_text, pii_cb, secrets_cb, model_cb], outputs=[sanitized_output, findings, banner, state], ) round_trip_btn.click( do_round_trip, inputs=[input_text, pii_cb, secrets_cb, model_cb], outputs=[rt_sanitized, rt_llm, rt_restored, findings, banner, state], ) download_btn.click(do_download, inputs=[state], outputs=[audit_file]) if __name__ == "__main__": demo.launch()