Spaces:
Sleeping
Sleeping
File size: 6,803 Bytes
e431b8d 7ac0307 e431b8d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 | """app.py — PrivacyShield Gradio firewall UI + orchestration (§5).
Layout: paste text -> Sanitize -> sanitized output + findings table +
"N blocked · 0 leaked" banner -> optional simulated LLM round-trip
(mask -> stub LLM -> restore) -> downloadable audit log (placeholders only).
The vault (placeholder -> original) lives only in gr.State (per-session,
in-memory) and is never written to the audit log.
"""
import json
import tempfile
# --- Workaround for a Gradio 4.44 + Python 3.13 bug: building the API schema
# crashes with "argument of type 'bool' is not iterable" on boolean JSON-schema
# nodes (additionalProperties: true). Patch the schema walker to treat bool
# schemas as "Any". Cosmetic only — affects the /info type hints, nothing else.
try:
import gradio_client.utils as _gcu
if hasattr(_gcu, "_json_schema_to_python_type"):
_orig_json_schema = _gcu._json_schema_to_python_type
def _safe_json_schema(schema, defs=None):
if isinstance(schema, bool):
return "Any"
return _orig_json_schema(schema, defs)
_gcu._json_schema_to_python_type = _safe_json_schema
if hasattr(_gcu, "get_type"):
_orig_get_type = _gcu.get_type
def _safe_get_type(schema):
if not isinstance(schema, dict):
return "Any"
return _orig_get_type(schema)
_gcu.get_type = _safe_get_type
except Exception:
pass
# --- end workaround ---
import gradio as gr
from firewall import audit_log, round_trip, sanitize
PII_EXAMPLE = (
"Hi team, this is Ramesh Kumar from Acme Traders Pvt Ltd. My number is "
"9876543210 and email ramesh.kumar1@gmail.com. Please update my KYC — "
"Aadhaar 2341 7634 8900, PAN ABCDE1234F. Refund my payment to card "
"4532 0111 1222 2233 and ship the replacement to 12, MG Road, Pune, "
"Maharashtra - 411001."
)
SECRET_EXAMPLE = (
"# config.py — DO NOT COMMIT\n"
'AWS_ACCESS_KEY_ID = "AKIAIOSFODNN7EXAMPLE"\n'
'AWS_SECRET_ACCESS_KEY = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"\n'
'JWT_TOKEN = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9'
'.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ'
'.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c"\n'
'SLACK_WEBHOOK = "xoxb-1234567890-abcdefghijklmnop"\n'
'ADMIN_EMAIL = "admin@acmetraders.com"\n'
"-----BEGIN RSA PRIVATE KEY-----\n"
"MIIBOgIBAAJBAK0z9k1234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJ\n"
"-----END RSA PRIVATE KEY-----"
)
FINDINGS_HEADERS = ["type", "masked value", "source", "confidence"]
def _findings_rows(findings: list[dict]) -> list[list]:
return [[f["type"], f["masked_value"], f["source"], f["confidence"]] for f in findings]
def _banner(result: dict) -> str:
note = ""
if result["model_status"] == "off":
note = " _(rules-only mode — fine-tuned model disabled)_"
elif result["model_status"] == "unavailable":
note = " _(rules-only mode — fine-tuned model unavailable, detectors still active)_"
return f"### 🛡️ {result['blocked']} items blocked · 0 leaked{note}"
def do_sanitize(text: str, pii: bool, secrets: bool, use_model: bool):
if not text or not text.strip():
return "", [], "### 🛡️ 0 items blocked · 0 leaked", None
result = sanitize(text, pii=pii, secrets=secrets, use_model=use_model)
return result["sanitized_text"], _findings_rows(result["findings"]), _banner(result), result
def do_round_trip(text: str, pii: bool, secrets: bool, use_model: bool):
if not text or not text.strip():
return "", "", "", [], "### 🛡️ 0 items blocked · 0 leaked", None
result = round_trip(text, pii=pii, secrets=secrets, use_model=use_model)
return (
result["sanitized_text"],
result["llm_response"],
result["restored_response"],
_findings_rows(result["findings"]),
_banner(result),
result,
)
def do_download(result: dict | None):
if not result:
return None
with tempfile.NamedTemporaryFile(
mode="w", suffix=".json", prefix="privacyshield_audit_", delete=False, encoding="utf-8"
) as f:
json.dump(audit_log(result), f, indent=2)
path = f.name
return path
with gr.Blocks(title="PrivacyShield") as demo:
gr.Markdown("# 🛡️ PrivacyShield — your local firewall for LLMs")
gr.Markdown(
"Strip PII & secrets before they ever reach an API. "
"Nothing sensitive leaves your machine."
)
input_text = gr.Textbox(
label="Paste a prompt / code snippet", lines=8, placeholder="Paste text here…"
)
with gr.Row():
pii_example_btn = gr.Button("Try: PII example")
secret_example_btn = gr.Button("Try: leaked-secret example")
with gr.Row():
pii_cb = gr.Checkbox(value=True, label="PII")
secrets_cb = gr.Checkbox(value=True, label="Secrets")
model_cb = gr.Checkbox(value=True, label="Use fine-tuned model")
sanitize_btn = gr.Button("🛡️ Sanitize", variant="primary")
banner = gr.Markdown("### 🛡️ 0 items blocked · 0 leaked")
sanitized_output = gr.Textbox(
label="Sanitized output (placeholders shown)", lines=8, interactive=False
)
findings = gr.Dataframe(headers=FINDINGS_HEADERS, label="Findings", interactive=False)
with gr.Accordion("⚙️ Simulate LLM round-trip", open=False):
gr.Markdown(
"Sends the **sanitized** text to a stub LLM (templated echo, no API key "
"needed) and restores the original values into the response."
)
round_trip_btn = gr.Button("Simulate LLM round-trip")
rt_sanitized = gr.Textbox(label="1. Sanitized prompt -> LLM", lines=4, interactive=False)
rt_llm = gr.Textbox(label="2. LLM (stub) response", lines=4, interactive=False)
rt_restored = gr.Textbox(
label="3. Response with originals RESTORED", lines=4, interactive=False
)
download_btn = gr.Button("⬇ Download audit log (JSON)")
audit_file = gr.File(label="audit_log.json (placeholders only — never raw values)")
state = gr.State()
pii_example_btn.click(lambda: PII_EXAMPLE, outputs=input_text)
secret_example_btn.click(lambda: SECRET_EXAMPLE, outputs=input_text)
sanitize_btn.click(
do_sanitize,
inputs=[input_text, pii_cb, secrets_cb, model_cb],
outputs=[sanitized_output, findings, banner, state],
)
round_trip_btn.click(
do_round_trip,
inputs=[input_text, pii_cb, secrets_cb, model_cb],
outputs=[rt_sanitized, rt_llm, rt_restored, findings, banner, state],
)
download_btn.click(do_download, inputs=[state], outputs=[audit_file])
if __name__ == "__main__":
demo.launch()
|