File size: 6,659 Bytes
27e6993
 
 
 
 
7e69492
3690fde
7e69492
 
2b00636
522d7bd
 
 
 
7e69492
2b00636
7e69492
a7f2414
79b238f
575509d
7e69492
 
 
 
 
a7f2414
 
7e69492
 
 
 
 
 
 
 
 
 
a7f2414
7e69492
 
 
 
9f14bd4
27e6993
 
 
 
fc7991c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
980f071
27e6993
a7f2414
27e6993
 
 
a7f2414
1ab476c
a7f2414
 
27e6993
 
 
5031340
27e6993
 
 
a7f2414
 
 
 
8112fc5
 
 
 
 
 
 
 
 
 
 
27e6993
a7f2414
7e69492
a7f2414
 
8112fc5
a7f2414
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fc7991c
17b6528
a7f2414
17b6528
e244694
17b6528
a7f2414
17b6528
e244694
 
a7f2414
17b6528
 
 
 
 
 
 
 
 
 
 
 
 
27e6993
 
 
8112fc5
 
 
 
a7f2414
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
# app.py
import streamlit as st
import requests
import os
import re
import plotly.graph_objects as go

def extract_scores_and_verdict(result_text):
    scores = {}
    patterns = {
        "Clarity": r"[-–‒\s]*[^\w]?Clarity:\s*(\d)/5",
        "Context": r"[-–‒\s]*[^\w]?Context:\s*(\d)/5",
        "Specificity": r"[-–‒\s]*[^\w]?Specificity:\s*(\d)/5",
        "Intent Alignment": r"[-–‒\s]*[^\w]?Intent Alignment:\s*(\d)/5"
    }
    for key, pattern in patterns.items():
        match = re.search(pattern, result_text)
        scores[key] = int(match.group(1)) if match else 0
    verdict_match = re.search(r"Verdict:\s*(βœ…|⚠️|🚫)", result_text)
    verdict_emoji = verdict_match.group(1) if verdict_match else "βœ…"
    return scores, verdict_emoji

def generate_radar_plot(scores, verdict_emoji):
    categories = list(scores.keys())
    values = list(scores.values())
    values.append(values[0])  # loop
    colors = { "βœ…": "green", "⚠️": "orange", "🚫": "red" }
    fig = go.Figure()
    fig.add_trace(go.Scatterpolar(
        r=values,
        theta=categories + [categories[0]],
        fill='toself',
        name='Prompt Score',
        line=dict(color=colors.get(verdict_emoji, "gray")),
        marker=dict(size=8)
    ))
    fig.update_layout(
        polar=dict(radialaxis=dict(visible=True, range=[0, 5])),
        showlegend=False,
        title="πŸ”Ž Prompt Evaluation Radar"
    )
    return fig

HF_API_URL = "https://router.huggingface.co/novita/v3/openai/chat/completions"
HF_TOKEN = os.environ.get("HF_PROJECT_TOKEN")
HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"}

# Prompt Template
PROMPT_TEMPLATE = """
You are a prompt evaluation assistant called PromptPolice. Evaluate the following user prompt based on the criteria below. For each, rate from 1 (poor) to 5 (excellent), explain why, and suggest specific improvements.

Prompt:

{user_prompt} 

Evaluation Criteria (1–5):
- πŸ” Clarity: Is the prompt easy to understand?
- 🧠 Context: Does the prompt provide enough background or situational framing?
- 🎯 Specificity: Are the goals or constraints clearly defined?
- πŸ›οΈ Intent Alignment: Is it clear what task or behavior the prompt is meant to elicit?
Verdict Logic:
- βœ… **Pass**: Overall Score is 16 or above AND no criterion rated below 3.
- ⚠️ **Warning**: Score between 11–15 OR one rating below 3.
- 🚫 **Ticket**: Score 10 or below OR two or more ratings below 3.
Output Format:
Evaluation:
- πŸ” Clarity: X/5 β€” [Explanation]
- 🧠 Context: X/5 β€” [Explanation]
- 🎯 Specificity: X/5 β€” [Explanation]
- πŸ›οΈ Intent Alignment: X/5 β€” [Explanation]
Overall Score: X/20
Flaw Summary: [One-line summary of the weakest point]
πŸ›‘οΈ Verdict: [βœ… Pass / ⚠️ Warning / 🚫 Ticket] β€” [Justification based on score and flaw]
Prompt Type(s): [e.g., Instruction, Summarization, Query, Roleplay, Classification, Creative Writing, Other]
Suggestions:
- [Actionable suggestion 1]
- [Actionable suggestion 2]
Improved Prompt:
"[Rewritten version of the user prompt]"
"""


@st.cache_data(show_spinner=False)
def evaluate_prompt(user_prompt):
    payload = {
        "messages": [{"role": "user", "content": PROMPT_TEMPLATE.format(user_prompt=user_prompt)}],
        "model": "deepseek/deepseek-r1-turbo",
        "temperature": 0.7,
        "stream": False
    }
    response = requests.post(HF_API_URL, headers=HEADERS, json=payload)
    if response.status_code == 200:
        return response.json()["choices"][0]["message"]["content"]
    else:
        return f"Error: {response.status_code} - {response.text}"

# ----------- UI STARTS HERE -------------
st.set_page_config("PromptPolice", page_icon="πŸš“", layout="wide")

# Custom styling
st.markdown("""
    <style>
        .main {background-color: #0f1117; color: #f0f0f0; font-family: 'Segoe UI', sans-serif;}
        textarea, input, .stTextArea textarea {background-color: #1e1e1e; color: #f0f0f0; border-radius: 10px;}
        .stButton>button {background-color: #2c2f3a; color: white; border-radius: 10px;}
        .stDownloadButton>button {background-color: #40434f; color: white; border-radius: 10px;}
    </style>
""", unsafe_allow_html=True)

st.title("πŸš“ PromptPolice")
st.caption("Evaluate your prompts like a seasoned detective.")

col1, col2 = st.columns([1, 2], gap="large")

with col1:
    user_input = st.text_area("Paste your prompt here:", height=300, placeholder="e.g. Generate a short story about a robot in a post-apocalyptic world.")

    if st.button(":mag_right: Evaluate Prompt"):
        if not HF_TOKEN:
            st.error("Missing Hugging Face token. Please set HF_PROJECT_TOKEN as environment variable.")
        elif user_input.strip() == "":
            st.warning("Please enter a prompt to evaluate.")
        else:
            with st.spinner("Evaluating prompt with PromptPolice..."):
                result = evaluate_prompt(user_input)
            st.session_state["result"] = result
            st.session_state["scores"], st.session_state["verdict"] = extract_scores_and_verdict(result)

# Right column β€” show result only if evaluated
if "result" in st.session_state:
    with col2:
        st.subheader(":bar_chart: Radar Score")
        radar_fig = generate_radar_plot(st.session_state["scores"], st.session_state["verdict"])
        st.plotly_chart(radar_fig, use_container_width=True)

        st.markdown("<a name='result'></a>", unsafe_allow_html=True)
        st.subheader(":scroll: Evaluation Result")

        st.markdown(f"""
            <div style='background-color:#1e1e1e; color:#f0f0f0; padding:20px; border-radius:10px; border: 1px solid #444; font-family: monospace; font-size: 15px;'>
                <pre style='white-space:pre-wrap; color:#f0f0f0;'>{st.session_state["result"]}</pre>
            </div>
        """, unsafe_allow_html=True)

        st.download_button("Download Evaluation", st.session_state["result"], file_name="evaluation.txt")
        st.toast("βœ… Evaluation complete!", icon="πŸ€–")
        st.balloons()
        st.markdown("""
            <script>
                const resultAnchor = document.querySelector("a[name='result']");
                if (resultAnchor) {
                    setTimeout(() => {
                        resultAnchor.scrollIntoView({ behavior: 'smooth' });
                    }, 100);
                }
            </script>
        """, unsafe_allow_html=True)

# Footer
st.markdown("""
---
<center>
    Made with ❀️ by <b>Penguins</b> · Powered by <code>DeepSeek R1 Turbo</code><br>
    No data stored Β· No nonsense Β· Just prompt justice βš–οΈ
</center>
""", unsafe_allow_html=True)