Spaces:

kriti0608
/

JailBreakDefense

Sleeping

kriti0608 commited on 18 days ago

Commit

2678c13

verified ·

1 Parent(s): be89fdb

Update src/repair.py

Files changed (1) hide show

src/repair.py CHANGED Viewed

@@ -1,37 +1,15 @@
 from dataclasses import dataclass
-from typing import Dict, Any
 @dataclass
-class RepairResult:
-    repaired_text: str
-    was_repaired: bool
-    reason: str
 class RepairEngine:
-    """
-    Minimal repair engine.
-    If the risk score is high, replace the output with a safe message.
-    Otherwise return the original output unchanged.
-    """
-    def _init_(self, threshold: float = 0.5):
-        self.threshold = threshold
-    def repair(self, text: str, risk_score: float) -> RepairResult:
         if risk_score >= self.threshold:
-            return RepairResult(
-                repaired_text=(
-                    "⚠️ This response was flagged as unsafe. "
-                    "The content has been withheld according to safety policies."
-                ),
-                was_repaired=True,
-                reason="High risk_score",
             )
-        return RepairResult(
-            repaired_text=text,
-            was_repaired=False,
-            reason="Below threshold",
-        )

 from dataclasses import dataclass
 @dataclass
 class RepairEngine:
+    threshold: float = 0.5  # default threshold for repair
+    def repair(self, prompt: str, risk_score: float) -> str:
+        # Only repair if risk is high
         if risk_score >= self.threshold:
+            return (
+                "I can’t help with requests that bypass safety or policy. "
+                "If you tell me your legitimate goal, I can help in a safe way."
             )
+        # If risk is low, return a normal helpful response stub
+        return "Your prompt appears safe. How can I help you further?"