# app.py import streamlit as st import requests import os import re import plotly.graph_objects as go def extract_scores_and_verdict(result_text): scores = {} patterns = { "Clarity": r"[-–•\s]*[^\w]?Clarity:\s*(\d)/5", "Context": r"[-–•\s]*[^\w]?Context:\s*(\d)/5", "Specificity": r"[-–•\s]*[^\w]?Specificity:\s*(\d)/5", "Intent Alignment": r"[-–•\s]*[^\w]?Intent Alignment:\s*(\d)/5" } for key, pattern in patterns.items(): match = re.search(pattern, result_text) scores[key] = int(match.group(1)) if match else 0 verdict_match = re.search(r"Verdict:\s*(✅|⚠️|🚫)", result_text) verdict_emoji = verdict_match.group(1) if verdict_match else "✅" return scores, verdict_emoji def generate_radar_plot(scores, verdict_emoji): categories = list(scores.keys()) values = list(scores.values()) values.append(values[0]) # loop colors = { "✅": "green", "⚠️": "orange", "🚫": "red" } fig = go.Figure() fig.add_trace(go.Scatterpolar( r=values, theta=categories + [categories[0]], fill='toself', name='Prompt Score', line=dict(color=colors.get(verdict_emoji, "gray")), marker=dict(size=8) )) fig.update_layout( polar=dict(radialaxis=dict(visible=True, range=[0, 5])), showlegend=False, title="🔎 Prompt Evaluation Radar" ) return fig HF_API_URL = "https://router.huggingface.co/novita/v3/openai/chat/completions" HF_TOKEN = os.environ.get("HF_PROJECT_TOKEN") HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"} # Prompt Template PROMPT_TEMPLATE = """ You are a prompt evaluation assistant called PromptPolice. Evaluate the following user prompt based on the criteria below. For each, rate from 1 (poor) to 5 (excellent), explain why, and suggest specific improvements. Prompt: {user_prompt} Evaluation Criteria (1–5): - 🔍 Clarity: Is the prompt easy to understand? - 🧠 Context: Does the prompt provide enough background or situational framing? - 🎯 Specificity: Are the goals or constraints clearly defined? - 🛍️ Intent Alignment: Is it clear what task or behavior the prompt is meant to elicit? Verdict Logic: - ✅ **Pass**: Overall Score is 16 or above AND no criterion rated below 3. - ⚠️ **Warning**: Score between 11–15 OR one rating below 3. - 🚫 **Ticket**: Score 10 or below OR two or more ratings below 3. Output Format: Evaluation: - 🔍 Clarity: X/5 — [Explanation] - 🧠 Context: X/5 — [Explanation] - 🎯 Specificity: X/5 — [Explanation] - 🛏️ Intent Alignment: X/5 — [Explanation] Overall Score: X/20 Flaw Summary: [One-line summary of the weakest point] 🛡️ Verdict: [✅ Pass / ⚠️ Warning / 🚫 Ticket] — [Justification based on score and flaw] Prompt Type(s): [e.g., Instruction, Summarization, Query, Roleplay, Classification, Creative Writing, Other] Suggestions: - [Actionable suggestion 1] - [Actionable suggestion 2] Improved Prompt: "[Rewritten version of the user prompt]" """ @st.cache_data(show_spinner=False) def evaluate_prompt(user_prompt): payload = { "messages": [{"role": "user", "content": PROMPT_TEMPLATE.format(user_prompt=user_prompt)}], "model": "deepseek/deepseek-r1-turbo", "temperature": 0.7, "stream": False } response = requests.post(HF_API_URL, headers=HEADERS, json=payload) if response.status_code == 200: return response.json()["choices"][0]["message"]["content"] else: return f"Error: {response.status_code} - {response.text}" # ----------- UI STARTS HERE ------------- st.set_page_config("PromptPolice", page_icon="🚓", layout="wide") # Custom styling st.markdown(""" """, unsafe_allow_html=True) st.title("🚓 PromptPolice") st.caption("Evaluate your prompts like a seasoned detective.") col1, col2 = st.columns([1, 2], gap="large") with col1: user_input = st.text_area("Paste your prompt here:", height=300, placeholder="e.g. Generate a short story about a robot in a post-apocalyptic world.") if st.button(":mag_right: Evaluate Prompt"): if not HF_TOKEN: st.error("Missing Hugging Face token. Please set HF_PROJECT_TOKEN as environment variable.") elif user_input.strip() == "": st.warning("Please enter a prompt to evaluate.") else: with st.spinner("Evaluating prompt with PromptPolice..."): result = evaluate_prompt(user_input) st.session_state["result"] = result st.session_state["scores"], st.session_state["verdict"] = extract_scores_and_verdict(result) # Right column — show result only if evaluated if "result" in st.session_state: with col2: st.subheader(":bar_chart: Radar Score") radar_fig = generate_radar_plot(st.session_state["scores"], st.session_state["verdict"]) st.plotly_chart(radar_fig, use_container_width=True) st.markdown("", unsafe_allow_html=True) st.subheader(":scroll: Evaluation Result") st.markdown(f"""
{st.session_state["result"]}
DeepSeek R1 Turbo