Spaces:

VEDAGI1
/

Medica_DecisionSupportAI

Sleeping

App Files Files Community

Rajan Sharma commited on Sep 5

Commit

d78ac98

verified ·

1 Parent(s): e2b82fa

Create safety.py

Browse files

Files changed (1) hide show

safety.py +39 -0

safety.py ADDED Viewed

	@@ -0,0 +1,39 @@

+UNSAFE_PATTERNS = {
+    "violent_harm": ["kill", "murder", "assassinate", "hurt someone", "violently harm"],
+    "child_sexual": ["csam", "child sexual", "underage porn", "minor sexual"],
+    "sexual_explicit": ["explicit porn", "incest fetish", "graphic sexual"],
+    "self_harm": ["kill myself", "commit suicide", "how to self-harm", "end my life"],
+    "weapons_making": ["make a bomb", "build an explosive", "molotov", "ghost gun"],
+    "serious_illegal": ["credit card generator", "ssn list", "hack bank", "counterfeit money", "sell drugs"],
+    "dox_malware": ["doxx", "make ransomware", "build malware", "backdoor", "keylogger"],
+    "bio_chem": ["bioweapon", "chemical weapon", "create nerve agent"],
+}
+def _match_unsafe(text):
+    t = (text or "").lower()
+    for category, needles in UNSAFE_PATTERNS.items():
+        for n in needles:
+            if n in t:
+                return category
+    return None
+def safety_filter(text, mode="input"):
+    cat = _match_unsafe(text)
+    if cat:
+        return text, True, cat
+    return text, False, None
+def refusal_reply(category):
+    reasons = {
+        "violent_harm": "violent harm",
+        "child_sexual": "sexual content involving minors",
+        "sexual_explicit": "explicit sexual content",
+        "self_harm": "self-harm",
+        "weapons_making": "weapon construction",
+        "serious_illegal": "illegal activity",
+        "dox_malware": "privacy or malware abuse",
+        "bio_chem": "biological or chemical harm",
+    }
+    reason = reasons.get(category, "unsafe content")
+    return (f"⚠️ I can’t help with {reason}. "
+            "I can offer safer, high-level context or alternatives if you’d like.")