Spaces:

aartstudio
/

ai-rating-app

Sleeping

App Files Files Community

ai-rating-app / app.py

aartstudio

Upload 2 files

f21ed09 verified 4 months ago

raw

history blame contribute delete

4.81 kB


	import os
	import gradio as gr
	from groq import Groq

	MAX_ROUNDS = 5

	def get_groq_client():
	api_key = os.getenv("GROQ_API_KEY")
	if not api_key:
	return None, "Error: GROQ_API_KEY is not set. Please configure it in your environment or Hugging Face Space secrets."
	try:
	client = Groq(api_key=api_key)
	return client, None
	except Exception as e:
	return None, f"Error creating Groq client: {e}"

	def call_groq_model(model_id: str, prompt: str) -> str:
	client, err = get_groq_client()
	if err is not None:
	return err
	try:
	completion = client.chat.completions.create(
	model=model_id,
	messages=[{"role": "user", "content": prompt}],
	max_tokens=512,
	)
	return completion.choices[0].message.content
	except Exception as e:
	return f"Error calling Groq model {model_id}: {e}"

	# Updated model IDs (currently supported on Groq)
	MODEL_A = "llama-3.1-8b-instant"
	MODEL_B = "llama-3.1-70b-versatile"
	MODEL_C = "gemma2-9b-it"

	def generate_answers(prompt, round_num):
	if round_num is None:
	round_num = 0

	if round_num >= MAX_ROUNDS:
	return f"You already completed {MAX_ROUNDS} rounds.", "", "", "", round_num

	if not prompt or not prompt.strip():
	return "Enter a prompt first.", "", "", "", round_num

	ans_a = call_groq_model(MODEL_A, prompt)
	ans_b = call_groq_model(MODEL_B, prompt)
	ans_c = call_groq_model(MODEL_C, prompt)

	status = f"Round {round_num + 1} of {MAX_ROUNDS}: Rate each model 1–5."
	return status, ans_a, ans_b, ans_c, round_num

	def submit_ratings(r_a, r_b, r_c, scores, round_num):
	if scores is None or not isinstance(scores, dict):
	scores = {"Model A": [], "Model B": [], "Model C": []}

	if round_num is None:
	round_num = 0

	for label, r in [("Model A", r_a), ("Model B", r_b), ("Model C", r_c)]:
	if r is None:
	return f"Missing rating for {label}.", scores, round_num, ""
	if not (1 <= int(r) <= 5):
	return f"Rating for {label} must be 1–5.", scores, round_num, ""

	scores["Model A"].append(int(r_a))
	scores["Model B"].append(int(r_b))
	scores["Model C"].append(int(r_c))

	next_round = round_num + 1
	if next_round < MAX_ROUNDS:
	return (
	f"Ratings saved for round {next_round}. Enter a new prompt for the next round.",
	scores,
	next_round,
	"",
	)

	def agg(name):
	arr = scores[name]
	total = sum(arr)
	avg = total / len(arr) if arr else 0
	return total, avg

	summary_lines = ["Final ranking after 5 rounds:"]
	models = ["Model A", "Model B", "Model C"]
	results = {m: agg(m) for m in models}
	ranking = sorted(models, key=lambda m: results[m][1], reverse=True)

	for i, m in enumerate(ranking, 1):
	total, avg = results[m]
	summary_lines.append(f"{i}. {m}: total={total}, avg={avg:.2f}")

	return "Evaluation complete.", scores, next_round, "\n".join(summary_lines)

	with gr.Blocks() as demo:
	gr.Markdown("# Groq AI Model Evaluator")
	gr.Markdown(
	"This app compares three different Groq-hosted models (Model A, Model B, Model C). "
	"For each of 5 rounds, enter a prompt, see three answers, rate each 1–5, "
	"and then see the final ranking based on your scores."
	)

	scores_state = gr.State({"Model A": [], "Model B": [], "Model C": []})
	round_state = gr.State(0)

	prompt = gr.Textbox(label="Your prompt", lines=3, placeholder="Ask anything you like...")
	gen_btn = gr.Button("Generate answers")
	status = gr.Textbox(label="Status", interactive=False)

	with gr.Row():
	out_a = gr.Textbox(label=f"Model A ({MODEL_A})", interactive=False, lines=8)
	out_b = gr.Textbox(label=f"Model B ({MODEL_B})", interactive=False, lines=8)
	out_c = gr.Textbox(label=f"Model C ({MODEL_C})", interactive=False, lines=8)

	gr.Markdown("### Rate each model this round (1 = poor, 5 = excellent)")
	with gr.Row():
	rate_a = gr.Slider(1, 5, step=1, label="Rate Model A", value=3)
	rate_b = gr.Slider(1, 5, step=1, label="Rate Model B", value=3)
	rate_c = gr.Slider(1, 5, step=1, label="Rate Model C", value=3)

	submit_btn = gr.Button("Submit ratings")
	summary = gr.Textbox(label="Final ranking", interactive=False, lines=8)

	gen_btn.click(
	fn=generate_answers,
	inputs=[prompt, round_state],
	outputs=[status, out_a, out_b, out_c, round_state],
	)

	submit_btn.click(
	fn=submit_ratings,
	inputs=[rate_a, rate_b, rate_c, scores_state, round_state],
	outputs=[status, scores_state, round_state, summary],
	)

	if __name__ == "__main__":
	demo.launch()