PrivacyShield / app.py
perceptron01's picture
Upload app.py
7ac0307 verified
Raw
History Blame Contribute Delete
6.8 kB
"""app.py — PrivacyShield Gradio firewall UI + orchestration (§5).
Layout: paste text -> Sanitize -> sanitized output + findings table +
"N blocked · 0 leaked" banner -> optional simulated LLM round-trip
(mask -> stub LLM -> restore) -> downloadable audit log (placeholders only).
The vault (placeholder -> original) lives only in gr.State (per-session,
in-memory) and is never written to the audit log.
"""
import json
import tempfile
# --- Workaround for a Gradio 4.44 + Python 3.13 bug: building the API schema
# crashes with "argument of type 'bool' is not iterable" on boolean JSON-schema
# nodes (additionalProperties: true). Patch the schema walker to treat bool
# schemas as "Any". Cosmetic only — affects the /info type hints, nothing else.
try:
import gradio_client.utils as _gcu
if hasattr(_gcu, "_json_schema_to_python_type"):
_orig_json_schema = _gcu._json_schema_to_python_type
def _safe_json_schema(schema, defs=None):
if isinstance(schema, bool):
return "Any"
return _orig_json_schema(schema, defs)
_gcu._json_schema_to_python_type = _safe_json_schema
if hasattr(_gcu, "get_type"):
_orig_get_type = _gcu.get_type
def _safe_get_type(schema):
if not isinstance(schema, dict):
return "Any"
return _orig_get_type(schema)
_gcu.get_type = _safe_get_type
except Exception:
pass
# --- end workaround ---
import gradio as gr
from firewall import audit_log, round_trip, sanitize
PII_EXAMPLE = (
"Hi team, this is Ramesh Kumar from Acme Traders Pvt Ltd. My number is "
"9876543210 and email ramesh.kumar1@gmail.com. Please update my KYC — "
"Aadhaar 2341 7634 8900, PAN ABCDE1234F. Refund my payment to card "
"4532 0111 1222 2233 and ship the replacement to 12, MG Road, Pune, "
"Maharashtra - 411001."
)
SECRET_EXAMPLE = (
"# config.py — DO NOT COMMIT\n"
'AWS_ACCESS_KEY_ID = "AKIAIOSFODNN7EXAMPLE"\n'
'AWS_SECRET_ACCESS_KEY = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"\n'
'JWT_TOKEN = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9'
'.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ'
'.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c"\n'
'SLACK_WEBHOOK = "xoxb-1234567890-abcdefghijklmnop"\n'
'ADMIN_EMAIL = "admin@acmetraders.com"\n'
"-----BEGIN RSA PRIVATE KEY-----\n"
"MIIBOgIBAAJBAK0z9k1234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJ\n"
"-----END RSA PRIVATE KEY-----"
)
FINDINGS_HEADERS = ["type", "masked value", "source", "confidence"]
def _findings_rows(findings: list[dict]) -> list[list]:
return [[f["type"], f["masked_value"], f["source"], f["confidence"]] for f in findings]
def _banner(result: dict) -> str:
note = ""
if result["model_status"] == "off":
note = " _(rules-only mode — fine-tuned model disabled)_"
elif result["model_status"] == "unavailable":
note = " _(rules-only mode — fine-tuned model unavailable, detectors still active)_"
return f"### 🛡️ {result['blocked']} items blocked · 0 leaked{note}"
def do_sanitize(text: str, pii: bool, secrets: bool, use_model: bool):
if not text or not text.strip():
return "", [], "### 🛡️ 0 items blocked · 0 leaked", None
result = sanitize(text, pii=pii, secrets=secrets, use_model=use_model)
return result["sanitized_text"], _findings_rows(result["findings"]), _banner(result), result
def do_round_trip(text: str, pii: bool, secrets: bool, use_model: bool):
if not text or not text.strip():
return "", "", "", [], "### 🛡️ 0 items blocked · 0 leaked", None
result = round_trip(text, pii=pii, secrets=secrets, use_model=use_model)
return (
result["sanitized_text"],
result["llm_response"],
result["restored_response"],
_findings_rows(result["findings"]),
_banner(result),
result,
)
def do_download(result: dict | None):
if not result:
return None
with tempfile.NamedTemporaryFile(
mode="w", suffix=".json", prefix="privacyshield_audit_", delete=False, encoding="utf-8"
) as f:
json.dump(audit_log(result), f, indent=2)
path = f.name
return path
with gr.Blocks(title="PrivacyShield") as demo:
gr.Markdown("# 🛡️ PrivacyShield — your local firewall for LLMs")
gr.Markdown(
"Strip PII & secrets before they ever reach an API. "
"Nothing sensitive leaves your machine."
)
input_text = gr.Textbox(
label="Paste a prompt / code snippet", lines=8, placeholder="Paste text here…"
)
with gr.Row():
pii_example_btn = gr.Button("Try: PII example")
secret_example_btn = gr.Button("Try: leaked-secret example")
with gr.Row():
pii_cb = gr.Checkbox(value=True, label="PII")
secrets_cb = gr.Checkbox(value=True, label="Secrets")
model_cb = gr.Checkbox(value=True, label="Use fine-tuned model")
sanitize_btn = gr.Button("🛡️ Sanitize", variant="primary")
banner = gr.Markdown("### 🛡️ 0 items blocked · 0 leaked")
sanitized_output = gr.Textbox(
label="Sanitized output (placeholders shown)", lines=8, interactive=False
)
findings = gr.Dataframe(headers=FINDINGS_HEADERS, label="Findings", interactive=False)
with gr.Accordion("⚙️ Simulate LLM round-trip", open=False):
gr.Markdown(
"Sends the **sanitized** text to a stub LLM (templated echo, no API key "
"needed) and restores the original values into the response."
)
round_trip_btn = gr.Button("Simulate LLM round-trip")
rt_sanitized = gr.Textbox(label="1. Sanitized prompt -> LLM", lines=4, interactive=False)
rt_llm = gr.Textbox(label="2. LLM (stub) response", lines=4, interactive=False)
rt_restored = gr.Textbox(
label="3. Response with originals RESTORED", lines=4, interactive=False
)
download_btn = gr.Button("⬇ Download audit log (JSON)")
audit_file = gr.File(label="audit_log.json (placeholders only — never raw values)")
state = gr.State()
pii_example_btn.click(lambda: PII_EXAMPLE, outputs=input_text)
secret_example_btn.click(lambda: SECRET_EXAMPLE, outputs=input_text)
sanitize_btn.click(
do_sanitize,
inputs=[input_text, pii_cb, secrets_cb, model_cb],
outputs=[sanitized_output, findings, banner, state],
)
round_trip_btn.click(
do_round_trip,
inputs=[input_text, pii_cb, secrets_cb, model_cb],
outputs=[rt_sanitized, rt_llm, rt_restored, findings, banner, state],
)
download_btn.click(do_download, inputs=[state], outputs=[audit_file])
if __name__ == "__main__":
demo.launch()