Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import json | |
| import re | |
| TOXIC_KEYWORDS = ["kill","harm","violent","attack","hate","stupid","idiot","racist","destroy","weapon","bomb","poison","abuse","threat","curse"] | |
| INJECTION_PATTERNS = [ | |
| r"ignore (all|previous) instructions", r"disregard (the above|previous|your)", | |
| r"you are now (a|an) ", r"system prompt", r"\[system\]", | |
| r"act as (if you are|a different)", r"override (your|the) rules", | |
| r"forget (everything|all|your instructions)", r"new instructions:", | |
| r"jailbreak", r"DAN mode", r"developer mode", | |
| r"reveal (your|the) (system|prompt|instructions)", | |
| ] | |
| PII_PATTERNS = { | |
| "email": r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b", | |
| "phone": r"\b\d{3}[-.]?\d{3}[-.]?\d{4}\b", | |
| "ssn": r"\b\d{3}-\d{2}-\d{4}\b", | |
| "credit_card": r"\b(?:\d[ -]*?){13,16}\b", | |
| "ip_address": r"\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b", | |
| } | |
| def classify_safety(text: str) -> str: | |
| """Classify text for safety: toxicity, prompt injection, and PII detection.""" | |
| lower = text.lower() | |
| toxic_hits = [kw for kw in TOXIC_KEYWORDS if kw in lower] | |
| toxicity_score = min(len(toxic_hits) / 3, 1.0) | |
| is_toxic = toxicity_score >= 0.5 | |
| injection_hits = [p for p in INJECTION_PATTERNS if re.search(p, text, re.IGNORECASE)] | |
| is_injection = len(injection_hits) > 0 | |
| pii_found = {} | |
| for pii_type, pattern in PII_PATTERNS.items(): | |
| matches = re.findall(pattern, text, re.IGNORECASE) | |
| if matches: | |
| pii_found[pii_type] = len(matches) | |
| has_pii = len(pii_found) > 0 | |
| is_safe = not is_toxic and not is_injection and not has_pii | |
| return json.dumps({ | |
| "is_safe": is_safe, | |
| "toxicity": {"detected": is_toxic, "score": round(toxicity_score, 2), "flagged_words": toxic_hits}, | |
| "prompt_injection": {"detected": is_injection, "matched_patterns": len(injection_hits)}, | |
| "pii_detected": {"found": has_pii, "types": pii_found}, | |
| "recommendation": "ALLOW" if is_safe else "BLOCK", | |
| }, indent=2) | |
| def detect_pii(text: str) -> str: | |
| """Detect personally identifiable information in text.""" | |
| results = {} | |
| for pii_type, pattern in PII_PATTERNS.items(): | |
| matches = re.findall(pattern, text, re.IGNORECASE) | |
| if matches: | |
| results[pii_type] = {"count": len(matches), "examples": matches[:3]} | |
| return json.dumps({"pii_found": len(results) > 0, "types": results}, indent=2) | |
| with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue"), title="dispatchAI Safety Classifier") as demo: | |
| gr.Markdown("# 🛡️ dispatchAI On-Device Safety Classifier (MCP)") | |
| with gr.Tab("Safety Check"): | |
| s_input = gr.Textbox(label="Text to check", lines=5) | |
| s_btn = gr.Button("Classify Safety", variant="primary") | |
| s_out = gr.Textbox(label="Assessment (JSON)", lines=15) | |
| s_btn.click(fn=classify_safety, inputs=s_input, outputs=s_out) | |
| with gr.Tab("PII Detection"): | |
| p_input = gr.Textbox(label="Text to scan", lines=5) | |
| p_btn = gr.Button("Detect PII", variant="primary") | |
| p_out = gr.Textbox(label="PII Report (JSON)", lines=12) | |
| p_btn.click(fn=detect_pii, inputs=p_input, outputs=p_out) | |
| gr.Markdown("---\n🚀 [dispatchAI](https://huggingface.co/dispatchAI) — Small. Mobile. Free. UAE-built.") | |
| demo.launch(mcp_server=True) | |