aika42 commited on
Commit
fc7991c
Β·
verified Β·
1 Parent(s): 2e165ca

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +105 -37
app.py CHANGED
@@ -7,25 +7,44 @@ import plotly.graph_objects as go
7
 
8
  def extract_scores_and_verdict(result_text):
9
  scores = {}
 
 
10
  patterns = {
11
  "Clarity": r"[-–‒\s]*[^\w]?Clarity:\s*(\d)/5",
12
  "Context": r"[-–‒\s]*[^\w]?Context:\s*(\d)/5",
13
  "Specificity": r"[-–‒\s]*[^\w]?Specificity:\s*(\d)/5",
14
  "Intent Alignment": r"[-–‒\s]*[^\w]?Intent Alignment:\s*(\d)/5"
15
  }
 
16
  for key, pattern in patterns.items():
17
  match = re.search(pattern, result_text)
18
- scores[key] = int(match.group(1)) if match else 0
 
 
 
 
 
 
 
19
  verdict_match = re.search(r"Verdict:\s*(βœ…|⚠️|🚫)", result_text)
20
  verdict_emoji = verdict_match.group(1) if verdict_match else "βœ…"
 
21
  return scores, verdict_emoji
22
 
 
23
  def generate_radar_plot(scores, verdict_emoji):
24
  categories = list(scores.keys())
25
  values = list(scores.values())
26
- values.append(values[0]) # loop
27
- colors = { "βœ…": "green", "⚠️": "orange", "🚫": "red" }
 
 
 
 
 
 
28
  fig = go.Figure()
 
29
  fig.add_trace(go.Scatterpolar(
30
  r=values,
31
  theta=categories + [categories[0]],
@@ -34,28 +53,65 @@ def generate_radar_plot(scores, verdict_emoji):
34
  line=dict(color=colors.get(verdict_emoji, "gray")),
35
  marker=dict(size=8)
36
  ))
 
37
  fig.update_layout(
38
- polar=dict(radialaxis=dict(visible=True, range=[0, 5])),
 
 
39
  showlegend=False,
40
  title="πŸ”Ž Prompt Evaluation Radar"
41
  )
42
  return fig
43
 
 
44
  HF_API_URL = "https://router.huggingface.co/novita/v3/openai/chat/completions"
45
  HF_TOKEN = os.environ.get("HF_PROJECT_TOKEN")
46
  HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"}
47
 
48
- PROMPT_TEMPLATE = """You are a prompt evaluation assistant called PromptPolice...
49
- [unchanged template for brevity]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  """
51
 
 
52
  @st.cache_data(show_spinner=False)
53
  def evaluate_prompt(user_prompt):
54
  payload = {
55
- "messages": [{"role": "user", "content": PROMPT_TEMPLATE.format(user_prompt=user_prompt)}],
 
 
56
  "model": "deepseek/deepseek-r1-turbo",
57
- "temperature": 0.7,
58
- "stream": False
59
  }
60
  response = requests.post(HF_API_URL, headers=HEADERS, json=payload)
61
  if response.status_code == 200:
@@ -63,10 +119,9 @@ def evaluate_prompt(user_prompt):
63
  else:
64
  return f"Error: {response.status_code} - {response.text}"
65
 
66
- # ----------- UI STARTS HERE -------------
67
- st.set_page_config("PromptPolice", page_icon="πŸš“", layout="wide")
68
-
69
- # Custom styling
70
  st.markdown("""
71
  <style>
72
  .main {background-color: #0f1117; color: #f0f0f0; font-family: 'Segoe UI', sans-serif;}
@@ -79,39 +134,52 @@ st.markdown("""
79
  st.title("πŸš“ PromptPolice")
80
  st.caption("Evaluate your prompts like a seasoned detective.")
81
 
82
- col1, col2 = st.columns([1, 2], gap="large")
83
-
84
- with col1:
85
- user_input = st.text_area("Paste your prompt here:", height=300, placeholder="e.g. Generate a short story about a robot in a post-apocalyptic world.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
- if st.button(":mag_right: Evaluate Prompt"):
88
- if not HF_TOKEN:
89
- st.error("Missing Hugging Face token. Please set HF_PROJECT_TOKEN as environment variable.")
90
- elif user_input.strip() == "":
91
- st.warning("Please enter a prompt to evaluate.")
92
- else:
93
- with st.spinner("Evaluating prompt with PromptPolice..."):
94
- result = evaluate_prompt(user_input)
95
- st.session_state["result"] = result
96
- st.session_state["scores"], st.session_state["verdict"] = extract_scores_and_verdict(result)
97
-
98
- # Right column β€” show result only if evaluated
99
- if "result" in st.session_state:
100
- with col2:
101
- st.subheader(":bar_chart: Radar Score")
102
- radar_fig = generate_radar_plot(st.session_state["scores"], st.session_state["verdict"])
103
  st.plotly_chart(radar_fig, use_container_width=True)
104
 
 
 
105
  st.markdown("<a name='result'></a>", unsafe_allow_html=True)
106
- st.subheader(":scroll: Evaluation Result")
107
 
 
108
  st.markdown(f"""
109
  <div style='background-color:#1e1e1e; color:#f0f0f0; padding:20px; border-radius:10px; border: 1px solid #444; font-family: monospace; font-size: 15px;'>
110
- <pre style='white-space:pre-wrap; color:#f0f0f0;'>{st.session_state["result"]}</pre>
111
  </div>
112
  """, unsafe_allow_html=True)
113
 
114
- st.download_button("Download Evaluation", st.session_state["result"], file_name="evaluation.txt")
 
 
 
115
  st.toast("βœ… Evaluation complete!", icon="πŸ€–")
116
  st.balloons()
117
  st.markdown("""
@@ -132,4 +200,4 @@ st.markdown("""
132
  Made with ❀️ by <b>Penguins</b> Β· Powered by <code>DeepSeek R1 Turbo</code><br>
133
  No data stored Β· No nonsense Β· Just prompt justice βš–οΈ
134
  </center>
135
- """, unsafe_allow_html=True)
 
7
 
8
  def extract_scores_and_verdict(result_text):
9
  scores = {}
10
+
11
+ # Match categories based on keywords (not emojis)
12
  patterns = {
13
  "Clarity": r"[-–‒\s]*[^\w]?Clarity:\s*(\d)/5",
14
  "Context": r"[-–‒\s]*[^\w]?Context:\s*(\d)/5",
15
  "Specificity": r"[-–‒\s]*[^\w]?Specificity:\s*(\d)/5",
16
  "Intent Alignment": r"[-–‒\s]*[^\w]?Intent Alignment:\s*(\d)/5"
17
  }
18
+
19
  for key, pattern in patterns.items():
20
  match = re.search(pattern, result_text)
21
+ if match:
22
+ scores[key] = int(match.group(1))
23
+ else:
24
+ scores[key] = 0
25
+
26
+ # Match verdict based on emoji OR fallback to keyword
27
+
28
+ #verdict_match = re.search(r"Verdict: (βœ…|⚠️|🚫)", result_text)
29
  verdict_match = re.search(r"Verdict:\s*(βœ…|⚠️|🚫)", result_text)
30
  verdict_emoji = verdict_match.group(1) if verdict_match else "βœ…"
31
+
32
  return scores, verdict_emoji
33
 
34
+
35
  def generate_radar_plot(scores, verdict_emoji):
36
  categories = list(scores.keys())
37
  values = list(scores.values())
38
+ values.append(values[0]) # close the loop for radar
39
+
40
+ colors = {
41
+ "βœ…": "green",
42
+ "⚠️": "orange",
43
+ "🚫": "red"
44
+ }
45
+
46
  fig = go.Figure()
47
+
48
  fig.add_trace(go.Scatterpolar(
49
  r=values,
50
  theta=categories + [categories[0]],
 
53
  line=dict(color=colors.get(verdict_emoji, "gray")),
54
  marker=dict(size=8)
55
  ))
56
+
57
  fig.update_layout(
58
+ polar=dict(
59
+ radialaxis=dict(visible=True, range=[0, 5]),
60
+ ),
61
  showlegend=False,
62
  title="πŸ”Ž Prompt Evaluation Radar"
63
  )
64
  return fig
65
 
66
+ # Environment Setup
67
  HF_API_URL = "https://router.huggingface.co/novita/v3/openai/chat/completions"
68
  HF_TOKEN = os.environ.get("HF_PROJECT_TOKEN")
69
  HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"}
70
 
71
+ # Prompt Template
72
+ PROMPT_TEMPLATE = """
73
+ You are a prompt evaluation assistant called PromptPolice. Evaluate the following user prompt based on the criteria below. For each, rate from 1 (poor) to 5 (excellent), explain why, and suggest specific improvements.
74
+
75
+ Prompt:
76
+
77
+ {user_prompt}
78
+
79
+ Evaluation Criteria (1–5):
80
+ - πŸ” Clarity: Is the prompt easy to understand?
81
+ - 🧠 Context: Does the prompt provide enough background or situational framing?
82
+ - 🎯 Specificity: Are the goals or constraints clearly defined?
83
+ - πŸ›οΈ Intent Alignment: Is it clear what task or behavior the prompt is meant to elicit?
84
+ Verdict Logic:
85
+ - βœ… **Pass**: Overall Score is 16 or above AND no criterion rated below 3.
86
+ - ⚠️ **Warning**: Score between 11–15 OR one rating below 3.
87
+ - 🚫 **Ticket**: Score 10 or below OR two or more ratings below 3.
88
+ Output Format:
89
+ Evaluation:
90
+ - πŸ” Clarity: X/5 β€” [Explanation]
91
+ - 🧠 Context: X/5 β€” [Explanation]
92
+ - 🎯 Specificity: X/5 β€” [Explanation]
93
+ - πŸ›οΈ Intent Alignment: X/5 β€” [Explanation]
94
+ Overall Score: X/20
95
+ Flaw Summary: [One-line summary of the weakest point]
96
+ πŸ›‘οΈ Verdict: [βœ… Pass / ⚠️ Warning / 🚫 Ticket] β€” [Justification based on score and flaw]
97
+ Prompt Type(s): [e.g., Instruction, Summarization, Query, Roleplay, Classification, Creative Writing, Other]
98
+ Suggestions:
99
+ - [Actionable suggestion 1]
100
+ - [Actionable suggestion 2]
101
+ Improved Prompt:
102
+ "[Rewritten version of the user prompt]"
103
  """
104
 
105
+ # Function to query DeepSeek
106
  @st.cache_data(show_spinner=False)
107
  def evaluate_prompt(user_prompt):
108
  payload = {
109
+ "messages": [
110
+ {"role": "user", "content": PROMPT_TEMPLATE.format(user_prompt=user_prompt)}
111
+ ],
112
  "model": "deepseek/deepseek-r1-turbo",
113
+ "temperature": 0.7, # Required for HF's OpenAI-compatible endpoint
114
+ "stream": False # Explicitly set streaming
115
  }
116
  response = requests.post(HF_API_URL, headers=HEADERS, json=payload)
117
  if response.status_code == 200:
 
119
  else:
120
  return f"Error: {response.status_code} - {response.text}"
121
 
122
+
123
+ # App UI
124
+ st.set_page_config("PromptPolice", page_icon="πŸš“", layout="centered")
 
125
  st.markdown("""
126
  <style>
127
  .main {background-color: #0f1117; color: #f0f0f0; font-family: 'Segoe UI', sans-serif;}
 
134
  st.title("πŸš“ PromptPolice")
135
  st.caption("Evaluate your prompts like a seasoned detective.")
136
 
137
+ # Sidebar
138
+ with st.sidebar:
139
+ st.header("πŸ›  Prompt Tools")
140
+ use_example = st.toggle("Load Example Prompt")
141
+ st.markdown("---")
142
+ st.info("Paste a natural language prompt and get an instant evaluation. No fluff.", icon="πŸ“Œ")
143
+
144
+ # Main Input
145
+ if use_example:
146
+ user_input = st.text_area("Paste your prompt here:",
147
+ "Generate a short story about a robot in a post-apocalyptic world.",
148
+ height=200)
149
+ else:
150
+ user_input = st.text_area("Paste your prompt here:", height=200)
151
+
152
+ # Evaluate Button
153
+ if st.button(":mag_right: Evaluate Prompt"):
154
+ if not HF_TOKEN:
155
+ st.error("Missing Hugging Face token. Please set HF_PROJECT_TOKEN as environment variable.")
156
+ elif user_input.strip() == "":
157
+ st.warning("Please enter a prompt to evaluate.")
158
+ else:
159
+ with st.spinner("Evaluating prompt with PromptPolice..."):
160
+ result = evaluate_prompt(user_input)
161
 
162
+ st.markdown("---")
163
+ st.subheader(":clipboard: Evaluation Result")
164
+ scores, verdict_emoji = extract_scores_and_verdict(result)
165
+ radar_fig = generate_radar_plot(scores, verdict_emoji)
 
 
 
 
 
 
 
 
 
 
 
 
166
  st.plotly_chart(radar_fig, use_container_width=True)
167
 
168
+
169
+ # Add scroll anchor
170
  st.markdown("<a name='result'></a>", unsafe_allow_html=True)
 
171
 
172
+ # Render result with dark theme-friendly style
173
  st.markdown(f"""
174
  <div style='background-color:#1e1e1e; color:#f0f0f0; padding:20px; border-radius:10px; border: 1px solid #444; font-family: monospace; font-size: 15px;'>
175
+ <pre style='white-space:pre-wrap; color:#f0f0f0;'>{result}</pre>
176
  </div>
177
  """, unsafe_allow_html=True)
178
 
179
+ # Download button
180
+ st.download_button("Download Evaluation", result, file_name="evaluation.txt")
181
+
182
+ # Toast + Balloons + Auto-scroll
183
  st.toast("βœ… Evaluation complete!", icon="πŸ€–")
184
  st.balloons()
185
  st.markdown("""
 
200
  Made with ❀️ by <b>Penguins</b> Β· Powered by <code>DeepSeek R1 Turbo</code><br>
201
  No data stored Β· No nonsense Β· Just prompt justice βš–οΈ
202
  </center>
203
+ """, unsafe_allow_html=True)