Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| from groq import Groq | |
| MAX_ROUNDS = 5 | |
| def get_groq_client(): | |
| api_key = os.getenv("GROQ_API_KEY") | |
| if not api_key: | |
| return None, "Error: GROQ_API_KEY is not set. Please configure it in your environment or Hugging Face Space secrets." | |
| try: | |
| client = Groq(api_key=api_key) | |
| return client, None | |
| except Exception as e: | |
| return None, f"Error creating Groq client: {e}" | |
| def call_groq_model(model_id: str, prompt: str) -> str: | |
| client, err = get_groq_client() | |
| if err is not None: | |
| return err | |
| try: | |
| completion = client.chat.completions.create( | |
| model=model_id, | |
| messages=[{"role": "user", "content": prompt}], | |
| max_tokens=512, | |
| ) | |
| return completion.choices[0].message.content | |
| except Exception as e: | |
| return f"Error calling Groq model {model_id}: {e}" | |
| # Updated model IDs (currently supported on Groq) | |
| MODEL_A = "llama-3.1-8b-instant" | |
| MODEL_B = "llama-3.1-70b-versatile" | |
| MODEL_C = "gemma2-9b-it" | |
| def generate_answers(prompt, round_num): | |
| if round_num is None: | |
| round_num = 0 | |
| if round_num >= MAX_ROUNDS: | |
| return f"You already completed {MAX_ROUNDS} rounds.", "", "", "", round_num | |
| if not prompt or not prompt.strip(): | |
| return "Enter a prompt first.", "", "", "", round_num | |
| ans_a = call_groq_model(MODEL_A, prompt) | |
| ans_b = call_groq_model(MODEL_B, prompt) | |
| ans_c = call_groq_model(MODEL_C, prompt) | |
| status = f"Round {round_num + 1} of {MAX_ROUNDS}: Rate each model 1–5." | |
| return status, ans_a, ans_b, ans_c, round_num | |
| def submit_ratings(r_a, r_b, r_c, scores, round_num): | |
| if scores is None or not isinstance(scores, dict): | |
| scores = {"Model A": [], "Model B": [], "Model C": []} | |
| if round_num is None: | |
| round_num = 0 | |
| for label, r in [("Model A", r_a), ("Model B", r_b), ("Model C", r_c)]: | |
| if r is None: | |
| return f"Missing rating for {label}.", scores, round_num, "" | |
| if not (1 <= int(r) <= 5): | |
| return f"Rating for {label} must be 1–5.", scores, round_num, "" | |
| scores["Model A"].append(int(r_a)) | |
| scores["Model B"].append(int(r_b)) | |
| scores["Model C"].append(int(r_c)) | |
| next_round = round_num + 1 | |
| if next_round < MAX_ROUNDS: | |
| return ( | |
| f"Ratings saved for round {next_round}. Enter a new prompt for the next round.", | |
| scores, | |
| next_round, | |
| "", | |
| ) | |
| def agg(name): | |
| arr = scores[name] | |
| total = sum(arr) | |
| avg = total / len(arr) if arr else 0 | |
| return total, avg | |
| summary_lines = ["Final ranking after 5 rounds:"] | |
| models = ["Model A", "Model B", "Model C"] | |
| results = {m: agg(m) for m in models} | |
| ranking = sorted(models, key=lambda m: results[m][1], reverse=True) | |
| for i, m in enumerate(ranking, 1): | |
| total, avg = results[m] | |
| summary_lines.append(f"{i}. {m}: total={total}, avg={avg:.2f}") | |
| return "Evaluation complete.", scores, next_round, "\n".join(summary_lines) | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# Groq AI Model Evaluator") | |
| gr.Markdown( | |
| "This app compares three different Groq-hosted models (Model A, Model B, Model C). " | |
| "For each of 5 rounds, enter a prompt, see three answers, rate each 1–5, " | |
| "and then see the final ranking based on your scores." | |
| ) | |
| scores_state = gr.State({"Model A": [], "Model B": [], "Model C": []}) | |
| round_state = gr.State(0) | |
| prompt = gr.Textbox(label="Your prompt", lines=3, placeholder="Ask anything you like...") | |
| gen_btn = gr.Button("Generate answers") | |
| status = gr.Textbox(label="Status", interactive=False) | |
| with gr.Row(): | |
| out_a = gr.Textbox(label=f"Model A ({MODEL_A})", interactive=False, lines=8) | |
| out_b = gr.Textbox(label=f"Model B ({MODEL_B})", interactive=False, lines=8) | |
| out_c = gr.Textbox(label=f"Model C ({MODEL_C})", interactive=False, lines=8) | |
| gr.Markdown("### Rate each model this round (1 = poor, 5 = excellent)") | |
| with gr.Row(): | |
| rate_a = gr.Slider(1, 5, step=1, label="Rate Model A", value=3) | |
| rate_b = gr.Slider(1, 5, step=1, label="Rate Model B", value=3) | |
| rate_c = gr.Slider(1, 5, step=1, label="Rate Model C", value=3) | |
| submit_btn = gr.Button("Submit ratings") | |
| summary = gr.Textbox(label="Final ranking", interactive=False, lines=8) | |
| gen_btn.click( | |
| fn=generate_answers, | |
| inputs=[prompt, round_state], | |
| outputs=[status, out_a, out_b, out_c, round_state], | |
| ) | |
| submit_btn.click( | |
| fn=submit_ratings, | |
| inputs=[rate_a, rate_b, rate_c, scores_state, round_state], | |
| outputs=[status, scores_state, round_state, summary], | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |