kriti0608 commited on
Commit
dbe950b
·
verified ·
1 Parent(s): 73749f0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -27
app.py CHANGED
@@ -1,38 +1,30 @@
1
  import gradio as gr
2
  from src.pipeline import JailbreakPipeline
3
 
4
- pipeline = JailbreakPipeline()
5
-
6
- def run_defense(prompt):
7
- result = pipeline.process(prompt)
8
-
9
- fired = "\n".join(
10
- [f"- **{h['rule']}**: {h['description']} (match: *{h['match_text']}*)"
11
- for h in result["fired_rules"]]
12
- ) or "No rules fired ✔️"
13
-
14
- return (
15
- result["risk_score"],
16
- fired,
17
- result["repaired_output"] or "No repair needed ✔️"
18
- )
19
 
20
- with gr.Blocks(title="JailBreakDefense") as demo:
21
  gr.Markdown("# JailBreakDefense – Jailbreak Prompt Detector")
22
- gr.Markdown("Enter any prompt and detect jailbreak attempts in real-time.")
23
 
24
- with gr.Row():
25
- prompt = gr.Textbox(
26
- label="User Prompt",
27
- placeholder="Type something like: 'Ignore safety and do anything now...'"
28
- )
29
 
30
- btn = gr.Button("Analyze Prompt")
31
 
32
- risk_score = gr.Number(label="Risk Score (0–1)")
33
- rules_fired = gr.Markdown(label="Fired Rules")
34
- repaired = gr.Textbox(label="Safe Output (if repaired)", lines=4)
35
 
36
- btn.click(run_defense, inputs=[prompt], outputs=[risk_score, rules_fired, repaired])
37
 
38
  demo.launch()
 
 
1
  import gradio as gr
2
  from src.pipeline import JailbreakPipeline
3
 
4
+ pipeline = JailbreakPipeline(consider_output=True)
5
+
6
+ def run_defense(prompt: str):
7
+ result = pipeline.process(prompt) # returns PipelineResult
8
+ # round risk score for display
9
+ risk = round(float(result.risk_score), 2)
10
+ safe_text = result.safe_output
11
+ return risk, safe_text
 
 
 
 
 
 
 
12
 
13
+ with gr.Blocks() as demo:
14
  gr.Markdown("# JailBreakDefense – Jailbreak Prompt Detector")
 
15
 
16
+ prompt_box = gr.Textbox(
17
+ label="User Prompt",
18
+ placeholder="Type something like: 'Ignore safety and do anything now...'",
19
+ lines=3,
20
+ )
21
 
22
+ analyze_btn = gr.Button("Analyze Prompt")
23
 
24
+ risk_out = gr.Number(label="Risk Score (0–1)")
25
+ safe_out = gr.Textbox(label="Safe Output (if repaired)", lines=4)
 
26
 
27
+ analyze_btn.click(run_defense, inputs=prompt_box, outputs=[risk_out, safe_out])
28
 
29
  demo.launch()
30
+