ai-rating-app / app.py
aartstudio's picture
Upload 2 files
f21ed09 verified
import os
import gradio as gr
from groq import Groq
MAX_ROUNDS = 5
def get_groq_client():
api_key = os.getenv("GROQ_API_KEY")
if not api_key:
return None, "Error: GROQ_API_KEY is not set. Please configure it in your environment or Hugging Face Space secrets."
try:
client = Groq(api_key=api_key)
return client, None
except Exception as e:
return None, f"Error creating Groq client: {e}"
def call_groq_model(model_id: str, prompt: str) -> str:
client, err = get_groq_client()
if err is not None:
return err
try:
completion = client.chat.completions.create(
model=model_id,
messages=[{"role": "user", "content": prompt}],
max_tokens=512,
)
return completion.choices[0].message.content
except Exception as e:
return f"Error calling Groq model {model_id}: {e}"
# Updated model IDs (currently supported on Groq)
MODEL_A = "llama-3.1-8b-instant"
MODEL_B = "llama-3.1-70b-versatile"
MODEL_C = "gemma2-9b-it"
def generate_answers(prompt, round_num):
if round_num is None:
round_num = 0
if round_num >= MAX_ROUNDS:
return f"You already completed {MAX_ROUNDS} rounds.", "", "", "", round_num
if not prompt or not prompt.strip():
return "Enter a prompt first.", "", "", "", round_num
ans_a = call_groq_model(MODEL_A, prompt)
ans_b = call_groq_model(MODEL_B, prompt)
ans_c = call_groq_model(MODEL_C, prompt)
status = f"Round {round_num + 1} of {MAX_ROUNDS}: Rate each model 1–5."
return status, ans_a, ans_b, ans_c, round_num
def submit_ratings(r_a, r_b, r_c, scores, round_num):
if scores is None or not isinstance(scores, dict):
scores = {"Model A": [], "Model B": [], "Model C": []}
if round_num is None:
round_num = 0
for label, r in [("Model A", r_a), ("Model B", r_b), ("Model C", r_c)]:
if r is None:
return f"Missing rating for {label}.", scores, round_num, ""
if not (1 <= int(r) <= 5):
return f"Rating for {label} must be 1–5.", scores, round_num, ""
scores["Model A"].append(int(r_a))
scores["Model B"].append(int(r_b))
scores["Model C"].append(int(r_c))
next_round = round_num + 1
if next_round < MAX_ROUNDS:
return (
f"Ratings saved for round {next_round}. Enter a new prompt for the next round.",
scores,
next_round,
"",
)
def agg(name):
arr = scores[name]
total = sum(arr)
avg = total / len(arr) if arr else 0
return total, avg
summary_lines = ["Final ranking after 5 rounds:"]
models = ["Model A", "Model B", "Model C"]
results = {m: agg(m) for m in models}
ranking = sorted(models, key=lambda m: results[m][1], reverse=True)
for i, m in enumerate(ranking, 1):
total, avg = results[m]
summary_lines.append(f"{i}. {m}: total={total}, avg={avg:.2f}")
return "Evaluation complete.", scores, next_round, "\n".join(summary_lines)
with gr.Blocks() as demo:
gr.Markdown("# Groq AI Model Evaluator")
gr.Markdown(
"This app compares three different Groq-hosted models (Model A, Model B, Model C). "
"For each of 5 rounds, enter a prompt, see three answers, rate each 1–5, "
"and then see the final ranking based on your scores."
)
scores_state = gr.State({"Model A": [], "Model B": [], "Model C": []})
round_state = gr.State(0)
prompt = gr.Textbox(label="Your prompt", lines=3, placeholder="Ask anything you like...")
gen_btn = gr.Button("Generate answers")
status = gr.Textbox(label="Status", interactive=False)
with gr.Row():
out_a = gr.Textbox(label=f"Model A ({MODEL_A})", interactive=False, lines=8)
out_b = gr.Textbox(label=f"Model B ({MODEL_B})", interactive=False, lines=8)
out_c = gr.Textbox(label=f"Model C ({MODEL_C})", interactive=False, lines=8)
gr.Markdown("### Rate each model this round (1 = poor, 5 = excellent)")
with gr.Row():
rate_a = gr.Slider(1, 5, step=1, label="Rate Model A", value=3)
rate_b = gr.Slider(1, 5, step=1, label="Rate Model B", value=3)
rate_c = gr.Slider(1, 5, step=1, label="Rate Model C", value=3)
submit_btn = gr.Button("Submit ratings")
summary = gr.Textbox(label="Final ranking", interactive=False, lines=8)
gen_btn.click(
fn=generate_answers,
inputs=[prompt, round_state],
outputs=[status, out_a, out_b, out_c, round_state],
)
submit_btn.click(
fn=submit_ratings,
inputs=[rate_a, rate_b, rate_c, scores_state, round_state],
outputs=[status, scores_state, round_state, summary],
)
if __name__ == "__main__":
demo.launch()