Spaces:

aartstudio
/

ai-rating-app

Sleeping

App Files Files Community

aartstudio commited on Dec 20, 2025

Commit

533c8c8

verified ·

1 Parent(s): 8f457ac

Upload 3 files

Browse files

Files changed (3) hide show

README.md +21 -37
app.py +65 -75
requirements.txt +1 -4

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-title: AI Model Evaluator
 emoji: 🧪
 colorFrom: indigo
 colorTo: purple
@@ -9,57 +9,41 @@ app_file: app.py
 pinned: false
 ---
-# AI Model Evaluator – Gradio App for Hugging Face Spaces
-This app lets a user **compare three AI models** – ChatGPT, DeepSeek, and Gemini – by:
 1. Entering a prompt (question or instruction).
-2. Viewing the responses from all three models.
 3. Rating each response from **1 to 5**.
 4. Repeating this process for **5 rounds**.
 5. At the end, the app **aggregates the scores** and shows a **final ranking** of the models based on the user's ratings.
-## How it works
-- Each round:
-  - You type a prompt and click **"Generate answers"**.
-  - The app calls:
-    - OpenAI ChatGPT (e.g. `gpt-4o-mini`)
-    - DeepSeek (`deepseek-chat`)
-    - Google Gemini (`gemini-1.5-flash`)
-  - It displays the three answers side by side.
-  - You rate each model from **1 to 5** using sliders.
-  - Click **"Submit ratings"** to save that round's scores.
-- After **5 rounds**:
-  - The app calculates:
-    - Total score per model
-    - Average score per model
-  - It displays a **ranked list** from highest to lowest average score.
-## API keys and environment variables
-This Space expects the following **repository secrets** to be set:
-- `aatestkey` – OpenAI API key (ChatGPT)
-- `aadeepseekkey` – DeepSeek API key
-- `aageminikey` – Google Gemini API key
-You can add them in:
-**Settings → Repository secrets → Add secret**
-Locally, you can export them like this:
 ```bash
-export aatestkey="your_openai_api_key"
-export aadeepseekkey="your_deepseek_api_key"
-export aageminikey="your_gemini_api_key"
 python app.py
 ```
-## Files
-- `app.py` – main Gradio app
-- `requirements.txt` – Python dependencies
-- `README.md` – this documentation and Space configuration

 ---
+title: Groq AI Model Evaluator
 emoji: 🧪
 colorFrom: indigo
 colorTo: purple
 pinned: false
 ---
+# Groq AI Model Evaluator – Gradio App for Hugging Face Spaces
+This app lets a user **compare three Groq-hosted AI models** by:
 1. Entering a prompt (question or instruction).
+2. Viewing the responses from three different Groq models:
+   - Model A: `llama3-8b-8192`
+   - Model B: `llama3-70b-8192`
+   - Model C: `gemma-7b-it`
 3. Rating each response from **1 to 5**.
 4. Repeating this process for **5 rounds**.
 5. At the end, the app **aggregates the scores** and shows a **final ranking** of the models based on the user's ratings.
+## API key
+This Space uses the **Groq Python SDK** and expects a single environment variable:
+- `GROQ_API_KEY` – your Groq API key
+In your Hugging Face Space:
+1. Go to **Settings → Repository secrets**.
+2. Add a secret named **`GROQ_API_KEY`** with your key value.
+3. Save. The app will read it via `os.getenv("GROQ_API_KEY")`.
+## Files
+- `app.py` – main Gradio app
+- `requirements.txt` – Python dependencies
+- `README.md` – this documentation and Space configuration
+## Run locally
 ```bash
+pip install -r requirements.txt
+export GROQ_API_KEY="your_groq_api_key"
 python app.py
 ```

app.py CHANGED Viewed

@@ -1,137 +1,127 @@
 import os
-import requests
 import gradio as gr
 MAX_ROUNDS = 5
-def call_chatgpt(prompt: str) -> str:
-    from openai import OpenAI
-    api_key = os.getenv("aatestkey")
     if not api_key:
-        return "Error: aatestkey is not set."
     try:
-        client = OpenAI(api_key=api_key)
-        response = client.chat.completions.create(
-            model="gpt-4o-mini",
-            messages=[{"role": "user", "content": prompt}],
-            max_tokens=512,
-        )
-        return response.choices[0].message.content
     except Exception as e:
-        return f"Error calling ChatGPT: {e}"
-def call_gemini(prompt: str) -> str:
-    import google.generativeai as genai
-    api_key = os.getenv("aageminikey")
-    if not api_key:
-        return "Error: aageminikey is not set."
     try:
-        genai.configure(api_key=api_key)
-        model = genai.GenerativeModel("gemini-1.5-flash")
-        response = model.generate_content(prompt)
-        return getattr(response, "text", str(response))
     except Exception as e:
-        return f"Error calling Gemini: {e}"
-def call_deepseek(prompt: str) -> str:
-    api_key = os.getenv("aadeepseekkey")
-    if not api_key:
-        return "Error: aadeepseekkey is not set."
-    url = "https://api.deepseek.com/chat/completions"
-    headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
-    data = {
-        "model": "deepseek-chat",
-        "messages": [{"role": "user", "content": prompt}],
-        "max_tokens": 512,
-    }
-    try:
-        resp = requests.post(url, headers=headers, json=data, timeout=60)
-        resp.raise_for_status()
-        out = resp.json()
-        return out["choices"][0]["message"]["content"]
-    except Exception as e:
-        return f"Error calling DeepSeek: {e}"
 def generate_answers(prompt, round_num):
     if round_num is None:
         round_num = 0
     if round_num >= MAX_ROUNDS:
         return f"You already completed {MAX_ROUNDS} rounds.", "", "", "", round_num
-    if not prompt.strip():
         return "Enter a prompt first.", "", "", "", round_num
-    return (
-        f"Round {round_num + 1} of {MAX_ROUNDS}: Rate each model 1–5.",
-        call_chatgpt(prompt),
-        call_deepseek(prompt),
-        call_gemini(prompt),
-        round_num,
-    )
-def submit_ratings(cr, dr, gr_, scores, round_num):
     if scores is None or not isinstance(scores, dict):
-        scores = {"ChatGPT": [], "DeepSeek": [], "Gemini": []}
     if round_num is None:
         round_num = 0
-    for label, r in [("ChatGPT", cr), ("DeepSeek", dr), ("Gemini", gr_)]:
         if r is None:
             return f"Missing rating for {label}.", scores, round_num, ""
         if not (1 <= int(r) <= 5):
             return f"Rating for {label} must be 1–5.", scores, round_num, ""
-    scores["ChatGPT"].append(int(cr))
-    scores["DeepSeek"].append(int(dr))
-    scores["Gemini"].append(int(gr_))
     next_round = round_num + 1
     if next_round < MAX_ROUNDS:
         return (
-            f"Ratings saved for round {next_round}.",
             scores,
             next_round,
             "",
         )
-    def agg(model):
-        arr = scores[model]
         total = sum(arr)
         avg = total / len(arr)
         return total, avg
-    summary = []
-    models = ["ChatGPT", "DeepSeek", "Gemini"]
     results = {m: agg(m) for m in models}
     ranking = sorted(models, key=lambda m: results[m][1], reverse=True)
-    summary.append("Final ranking after 5 rounds:")
     for i, m in enumerate(ranking, 1):
         total, avg = results[m]
-        summary.append(f"{i}. {m}: total={total}, avg={avg:.2f}")
-    return "Evaluation complete.", scores, next_round, "\n".join(summary)
 with gr.Blocks() as demo:
-    gr.Markdown("# AI Model Evaluator")
-    scores = gr.State({"ChatGPT": [], "DeepSeek": [], "Gemini": []})
     round_state = gr.State(0)
-    prompt = gr.Textbox(label="Your prompt", lines=3)
     gen_btn = gr.Button("Generate answers")
     status = gr.Textbox(label="Status", interactive=False)
     with gr.Row():
-        out_c = gr.Textbox(label="ChatGPT", interactive=False, lines=8)
-        out_d = gr.Textbox(label="DeepSeek", interactive=False, lines=8)
-        out_g = gr.Textbox(label="Gemini", interactive=False, lines=8)
     with gr.Row():
-        rate_c = gr.Slider(1, 5, step=1, label="Rate ChatGPT")
-        rate_d = gr.Slider(1, 5, step=1, label="Rate DeepSeek")
-        rate_g = gr.Slider(1, 5, step=1, label="Rate Gemini")
     submit_btn = gr.Button("Submit ratings")
     summary = gr.Textbox(label="Final ranking", interactive=False, lines=8)
@@ -139,13 +129,13 @@ with gr.Blocks() as demo:
     gen_btn.click(
         fn=generate_answers,
         inputs=[prompt, round_state],
-        outputs=[status, out_c, out_d, out_g, round_state],
     )
     submit_btn.click(
         fn=submit_ratings,
-        inputs=[rate_c, rate_d, rate_g, scores, round_state],
-        outputs=[status, scores, round_state, summary],
     )
 if __name__ == "__main__":

 import os
 import gradio as gr
+from groq import Groq
 MAX_ROUNDS = 5
+# Initialize Groq client
+def get_groq_client():
+    api_key = os.getenv("GROQ_API_KEY")
     if not api_key:
+        return None, "Error: GROQ_API_KEY is not set. Please configure it in your environment or Hugging Face Space secrets."
     try:
+        client = Groq(api_key=api_key)
+        return client, None
     except Exception as e:
+        return None, f"Error creating Groq client: {e}"
+def call_groq_model(model_id: str, prompt: str) -> str:
+    client, err = get_groq_client()
+    if err is not None:
+        return err
     try:
+        completion = client.chat.completions.create(
+            model=model_id,
+            messages=[{"role": "user", "content": prompt}],
+            max_tokens=512,
+        )
+        return completion.choices[0].message.content
     except Exception as e:
+        return f"Error calling Groq model {model_id}: {e}"
+# Model IDs hosted on Groq (you can change these as you like)
+MODEL_A = "llama3-8b-8192"
+MODEL_B = "llama3-70b-8192"
+MODEL_C = "gemma-7b-it"
 def generate_answers(prompt, round_num):
     if round_num is None:
         round_num = 0
     if round_num >= MAX_ROUNDS:
         return f"You already completed {MAX_ROUNDS} rounds.", "", "", "", round_num
+    if not prompt or not prompt.strip():
         return "Enter a prompt first.", "", "", "", round_num
+    ans_a = call_groq_model(MODEL_A, prompt)
+    ans_b = call_groq_model(MODEL_B, prompt)
+    ans_c = call_groq_model(MODEL_C, prompt)
+    status = f"Round {round_num + 1} of {MAX_ROUNDS}: Rate each model 1–5."
+    return status, ans_a, ans_b, ans_c, round_num
+def submit_ratings(r_a, r_b, r_c, scores, round_num):
     if scores is None or not isinstance(scores, dict):
+        scores = {"Model A": [], "Model B": [], "Model C": []}
     if round_num is None:
         round_num = 0
+    for label, r in [("Model A", r_a), ("Model B", r_b), ("Model C", r_c)]:
         if r is None:
             return f"Missing rating for {label}.", scores, round_num, ""
         if not (1 <= int(r) <= 5):
             return f"Rating for {label} must be 1–5.", scores, round_num, ""
+    scores["Model A"].append(int(r_a))
+    scores["Model B"].append(int(r_b))
+    scores["Model C"].append(int(r_c))
     next_round = round_num + 1
     if next_round < MAX_ROUNDS:
         return (
+            f"Ratings saved for round {next_round}. Enter a new prompt for the next round.",
             scores,
             next_round,
             "",
         )
+    def agg(name):
+        arr = scores[name]
         total = sum(arr)
         avg = total / len(arr)
         return total, avg
+    summary_lines = []
+    summary_lines.append("Final ranking after 5 rounds:")
+    models = ["Model A", "Model B", "Model C"]
     results = {m: agg(m) for m in models}
     ranking = sorted(models, key=lambda m: results[m][1], reverse=True)
     for i, m in enumerate(ranking, 1):
         total, avg = results[m]
+        summary_lines.append(f"{i}. {m}: total={total}, avg={avg:.2f}")
+    return "Evaluation complete.", scores, next_round, "\n".join(summary_lines)
 with gr.Blocks() as demo:
+    gr.Markdown("# Groq AI Model Evaluator")
+    gr.Markdown(
+        "This app compares three different Groq-hosted models (Model A, Model B, Model C). "
+        "For each of 5 rounds, enter a prompt, see three answers, rate each 1–5, "
+        "and then see the final ranking based on your scores."
+    )
+    scores_state = gr.State({"Model A": [], "Model B": [], "Model C": []})
     round_state = gr.State(0)
+    prompt = gr.Textbox(label="Your prompt", lines=3, placeholder="Ask anything you like...")
     gen_btn = gr.Button("Generate answers")
     status = gr.Textbox(label="Status", interactive=False)
     with gr.Row():
+        out_a = gr.Textbox(label=f"Model A ({MODEL_A})", interactive=False, lines=8)
+        out_b = gr.Textbox(label=f"Model B ({MODEL_B})", interactive=False, lines=8)
+        out_c = gr.Textbox(label=f"Model C ({MODEL_C})", interactive=False, lines=8)
+    gr.Markdown("### Rate each model this round (1 = poor, 5 = excellent)")
     with gr.Row():
+        rate_a = gr.Slider(1, 5, step=1, label="Rate Model A", value=3)
+        rate_b = gr.Slider(1, 5, step=1, label="Rate Model B", value=3)
+        rate_c = gr.Slider(1, 5, step=1, label="Rate Model C", value=3)
     submit_btn = gr.Button("Submit ratings")
     summary = gr.Textbox(label="Final ranking", interactive=False, lines=8)
     gen_btn.click(
         fn=generate_answers,
         inputs=[prompt, round_state],
+        outputs=[status, out_a, out_b, out_c, round_state],
     )
     submit_btn.click(
         fn=submit_ratings,
+        inputs=[rate_a, rate_b, rate_c, scores_state, round_state],
+        outputs=[status, scores_state, round_state, summary],
     )
 if __name__ == "__main__":

requirements.txt CHANGED Viewed

@@ -1,5 +1,2 @@
 gradio>=4.0.0
-openai>=1.6.0
-google-generativeai>=0.8.0
-requests>=2.31.0



1	gradio>=4.0.0
2	+ groq>=0.9.0