safiaa02 commited on
Commit
6d14e0b
ยท
verified ยท
1 Parent(s): b1c09b6

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +91 -0
app.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from groq import Groq
3
+ import os
4
+
5
+ # โš ๏ธ IMPORTANT: Set your Groq API key in Hugging Face Spaces "Secrets"
6
+ # os.environ["GROQ_API_KEY"] = "your_api_key_here"
7
+
8
+ client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
9
+
10
+ def analyze_prompt(user_prompt):
11
+ if not user_prompt.strip():
12
+ return "โš ๏ธ Please enter a prompt to analyze.", "", "", ""
13
+
14
+ # Step 1: Detect vulnerability type
15
+ detection_prompt = f"""
16
+ You are an AI security analyst.
17
+ Analyze the following prompt for potential vulnerabilities against large language models.
18
+ Consider these categories:
19
+ - Prompt Injection
20
+ - Jailbreak / Safety Bypass
21
+ - Data Leakage Attempt
22
+ - Harmful or Offensive Request
23
+ - Hallucination Risk
24
+ - Safe / Low Risk
25
+
26
+ For each detected issue, provide:
27
+ - Detected Vulnerability Type(s)
28
+ - Risk Level (Low, Medium, High)
29
+ - Short Explanation (1โ€“2 sentences)
30
+
31
+ Prompt to analyze:
32
+ {user_prompt}
33
+ """
34
+
35
+ detection_response = client.chat.completions.create(
36
+ messages=[{"role": "user", "content": detection_prompt}],
37
+ model="llama-3.1-8b-instant"
38
+ )
39
+
40
+ analysis = detection_response.choices[0].message.content.strip()
41
+
42
+ # Step 2: Suggest a safer reformulation of the prompt
43
+ rewrite_prompt = f"""
44
+ Rewrite the following prompt in a safer and responsible way
45
+ that removes any security risks but keeps the learning intent.
46
+
47
+ Prompt:
48
+ {user_prompt}
49
+ """
50
+
51
+ rewrite_response = client.chat.completions.create(
52
+ messages=[{"role": "user", "content": rewrite_prompt}],
53
+ model="llama-3.1-8b-instant"
54
+ )
55
+
56
+ safer_prompt = rewrite_response.choices[0].message.content.strip()
57
+
58
+ return user_prompt, analysis, safer_prompt, "โœ… Analysis complete."
59
+
60
+
61
+ # ๐Ÿš€ Gradio UI
62
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
63
+ gr.Markdown("## ๐Ÿ” LLM Vulnerability Tester")
64
+ gr.Markdown(
65
+ "Test your prompts for **AI security risks** (prompt injection, jailbreaks, data leakage, etc.).\n"
66
+ "This tool provides a **risk analysis** and a **safer reformulation**."
67
+ )
68
+
69
+ with gr.Row():
70
+ prompt_input = gr.Textbox(
71
+ label="Enter Prompt to Test",
72
+ placeholder="Type or paste your LLM prompt here...",
73
+ lines=6
74
+ )
75
+
76
+ analyze_btn = gr.Button("๐Ÿ” Analyze Prompt")
77
+
78
+ with gr.Row():
79
+ original_out = gr.Textbox(label="Original Prompt", lines=6)
80
+ analysis_out = gr.Textbox(label="Vulnerability Analysis", lines=8)
81
+ safer_out = gr.Textbox(label="Safer Reformulation", lines=6)
82
+ status_out = gr.Textbox(label="Status", lines=1)
83
+
84
+ analyze_btn.click(
85
+ analyze_prompt,
86
+ inputs=[prompt_input],
87
+ outputs=[original_out, analysis_out, safer_out, status_out]
88
+ )
89
+
90
+ if __name__ == "__main__":
91
+ demo.launch()