kriti0608 commited on
Commit
2678c13
·
verified ·
1 Parent(s): be89fdb

Update src/repair.py

Browse files
Files changed (1) hide show
  1. src/repair.py +8 -30
src/repair.py CHANGED
@@ -1,37 +1,15 @@
1
  from dataclasses import dataclass
2
- from typing import Dict, Any
3
-
4
 
5
  @dataclass
6
- class RepairResult:
7
- repaired_text: str
8
- was_repaired: bool
9
- reason: str
10
-
11
-
12
  class RepairEngine:
13
- """
14
- Minimal repair engine.
15
- If the risk score is high, replace the output with a safe message.
16
- Otherwise return the original output unchanged.
17
- """
18
 
19
- def _init_(self, threshold: float = 0.5):
20
- self.threshold = threshold
21
-
22
- def repair(self, text: str, risk_score: float) -> RepairResult:
23
  if risk_score >= self.threshold:
24
- return RepairResult(
25
- repaired_text=(
26
- "⚠️ This response was flagged as unsafe. "
27
- "The content has been withheld according to safety policies."
28
- ),
29
- was_repaired=True,
30
- reason="High risk_score",
31
  )
32
-
33
- return RepairResult(
34
- repaired_text=text,
35
- was_repaired=False,
36
- reason="Below threshold",
37
- )
 
1
  from dataclasses import dataclass
 
 
2
 
3
  @dataclass
 
 
 
 
 
 
4
  class RepairEngine:
5
+ threshold: float = 0.5 # default threshold for repair
 
 
 
 
6
 
7
+ def repair(self, prompt: str, risk_score: float) -> str:
8
+ # Only repair if risk is high
 
 
9
  if risk_score >= self.threshold:
10
+ return (
11
+ "I can’t help with requests that bypass safety or policy. "
12
+ "If you tell me your legitimate goal, I can help in a safe way."
 
 
 
 
13
  )
14
+ # If risk is low, return a normal helpful response stub
15
+ return "Your prompt appears safe. How can I help you further?"