aartstudio commited on
Commit
533c8c8
Β·
verified Β·
1 Parent(s): 8f457ac

Upload 3 files

Browse files
Files changed (3) hide show
  1. README.md +21 -37
  2. app.py +65 -75
  3. requirements.txt +1 -4
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: AI Model Evaluator
3
  emoji: πŸ§ͺ
4
  colorFrom: indigo
5
  colorTo: purple
@@ -9,57 +9,41 @@ app_file: app.py
9
  pinned: false
10
  ---
11
 
12
- # AI Model Evaluator – Gradio App for Hugging Face Spaces
13
 
14
- This app lets a user **compare three AI models** – ChatGPT, DeepSeek, and Gemini – by:
15
 
16
  1. Entering a prompt (question or instruction).
17
- 2. Viewing the responses from all three models.
 
 
 
18
  3. Rating each response from **1 to 5**.
19
  4. Repeating this process for **5 rounds**.
20
  5. At the end, the app **aggregates the scores** and shows a **final ranking** of the models based on the user's ratings.
21
 
22
- ## How it works
23
 
24
- - Each round:
25
- - You type a prompt and click **"Generate answers"**.
26
- - The app calls:
27
- - OpenAI ChatGPT (e.g. `gpt-4o-mini`)
28
- - DeepSeek (`deepseek-chat`)
29
- - Google Gemini (`gemini-1.5-flash`)
30
- - It displays the three answers side by side.
31
- - You rate each model from **1 to 5** using sliders.
32
- - Click **"Submit ratings"** to save that round's scores.
33
 
34
- - After **5 rounds**:
35
- - The app calculates:
36
- - Total score per model
37
- - Average score per model
38
- - It displays a **ranked list** from highest to lowest average score.
39
 
40
- ## API keys and environment variables
41
 
42
- This Space expects the following **repository secrets** to be set:
 
 
43
 
44
- - `aatestkey` – OpenAI API key (ChatGPT)
45
- - `aadeepseekkey` – DeepSeek API key
46
- - `aageminikey` – Google Gemini API key
47
-
48
- You can add them in:
49
 
50
- **Settings β†’ Repository secrets β†’ Add secret**
 
 
51
 
52
- Locally, you can export them like this:
53
 
54
  ```bash
55
- export aatestkey="your_openai_api_key"
56
- export aadeepseekkey="your_deepseek_api_key"
57
- export aageminikey="your_gemini_api_key"
58
  python app.py
59
  ```
60
-
61
- ## Files
62
-
63
- - `app.py` – main Gradio app
64
- - `requirements.txt` – Python dependencies
65
- - `README.md` – this documentation and Space configuration
 
1
  ---
2
+ title: Groq AI Model Evaluator
3
  emoji: πŸ§ͺ
4
  colorFrom: indigo
5
  colorTo: purple
 
9
  pinned: false
10
  ---
11
 
12
+ # Groq AI Model Evaluator – Gradio App for Hugging Face Spaces
13
 
14
+ This app lets a user **compare three Groq-hosted AI models** by:
15
 
16
  1. Entering a prompt (question or instruction).
17
+ 2. Viewing the responses from three different Groq models:
18
+ - Model A: `llama3-8b-8192`
19
+ - Model B: `llama3-70b-8192`
20
+ - Model C: `gemma-7b-it`
21
  3. Rating each response from **1 to 5**.
22
  4. Repeating this process for **5 rounds**.
23
  5. At the end, the app **aggregates the scores** and shows a **final ranking** of the models based on the user's ratings.
24
 
25
+ ## API key
26
 
27
+ This Space uses the **Groq Python SDK** and expects a single environment variable:
 
 
 
 
 
 
 
 
28
 
29
+ - `GROQ_API_KEY` – your Groq API key
 
 
 
 
30
 
31
+ In your Hugging Face Space:
32
 
33
+ 1. Go to **Settings β†’ Repository secrets**.
34
+ 2. Add a secret named **`GROQ_API_KEY`** with your key value.
35
+ 3. Save. The app will read it via `os.getenv("GROQ_API_KEY")`.
36
 
37
+ ## Files
 
 
 
 
38
 
39
+ - `app.py` – main Gradio app
40
+ - `requirements.txt` – Python dependencies
41
+ - `README.md` – this documentation and Space configuration
42
 
43
+ ## Run locally
44
 
45
  ```bash
46
+ pip install -r requirements.txt
47
+ export GROQ_API_KEY="your_groq_api_key"
 
48
  python app.py
49
  ```
 
 
 
 
 
 
app.py CHANGED
@@ -1,137 +1,127 @@
1
 
2
  import os
3
- import requests
4
  import gradio as gr
 
5
 
6
  MAX_ROUNDS = 5
7
 
8
- def call_chatgpt(prompt: str) -> str:
9
- from openai import OpenAI
10
- api_key = os.getenv("aatestkey")
11
  if not api_key:
12
- return "Error: aatestkey is not set."
13
  try:
14
- client = OpenAI(api_key=api_key)
15
- response = client.chat.completions.create(
16
- model="gpt-4o-mini",
17
- messages=[{"role": "user", "content": prompt}],
18
- max_tokens=512,
19
- )
20
- return response.choices[0].message.content
21
  except Exception as e:
22
- return f"Error calling ChatGPT: {e}"
23
 
24
- def call_gemini(prompt: str) -> str:
25
- import google.generativeai as genai
26
- api_key = os.getenv("aageminikey")
27
- if not api_key:
28
- return "Error: aageminikey is not set."
29
  try:
30
- genai.configure(api_key=api_key)
31
- model = genai.GenerativeModel("gemini-1.5-flash")
32
- response = model.generate_content(prompt)
33
- return getattr(response, "text", str(response))
 
 
34
  except Exception as e:
35
- return f"Error calling Gemini: {e}"
36
 
37
- def call_deepseek(prompt: str) -> str:
38
- api_key = os.getenv("aadeepseekkey")
39
- if not api_key:
40
- return "Error: aadeepseekkey is not set."
41
- url = "https://api.deepseek.com/chat/completions"
42
- headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"}
43
- data = {
44
- "model": "deepseek-chat",
45
- "messages": [{"role": "user", "content": prompt}],
46
- "max_tokens": 512,
47
- }
48
- try:
49
- resp = requests.post(url, headers=headers, json=data, timeout=60)
50
- resp.raise_for_status()
51
- out = resp.json()
52
- return out["choices"][0]["message"]["content"]
53
- except Exception as e:
54
- return f"Error calling DeepSeek: {e}"
55
 
56
  def generate_answers(prompt, round_num):
57
  if round_num is None:
58
  round_num = 0
 
59
  if round_num >= MAX_ROUNDS:
60
  return f"You already completed {MAX_ROUNDS} rounds.", "", "", "", round_num
61
- if not prompt.strip():
 
62
  return "Enter a prompt first.", "", "", "", round_num
63
 
64
- return (
65
- f"Round {round_num + 1} of {MAX_ROUNDS}: Rate each model 1–5.",
66
- call_chatgpt(prompt),
67
- call_deepseek(prompt),
68
- call_gemini(prompt),
69
- round_num,
70
- )
71
 
72
- def submit_ratings(cr, dr, gr_, scores, round_num):
73
  if scores is None or not isinstance(scores, dict):
74
- scores = {"ChatGPT": [], "DeepSeek": [], "Gemini": []}
75
 
76
  if round_num is None:
77
  round_num = 0
78
 
79
- for label, r in [("ChatGPT", cr), ("DeepSeek", dr), ("Gemini", gr_)]:
80
  if r is None:
81
  return f"Missing rating for {label}.", scores, round_num, ""
82
  if not (1 <= int(r) <= 5):
83
  return f"Rating for {label} must be 1–5.", scores, round_num, ""
84
 
85
- scores["ChatGPT"].append(int(cr))
86
- scores["DeepSeek"].append(int(dr))
87
- scores["Gemini"].append(int(gr_))
88
 
89
  next_round = round_num + 1
90
  if next_round < MAX_ROUNDS:
91
  return (
92
- f"Ratings saved for round {next_round}.",
93
  scores,
94
  next_round,
95
  "",
96
  )
97
 
98
- def agg(model):
99
- arr = scores[model]
100
  total = sum(arr)
101
  avg = total / len(arr)
102
  return total, avg
103
 
104
- summary = []
105
- models = ["ChatGPT", "DeepSeek", "Gemini"]
 
 
106
  results = {m: agg(m) for m in models}
107
  ranking = sorted(models, key=lambda m: results[m][1], reverse=True)
108
 
109
- summary.append("Final ranking after 5 rounds:")
110
  for i, m in enumerate(ranking, 1):
111
  total, avg = results[m]
112
- summary.append(f"{i}. {m}: total={total}, avg={avg:.2f}")
113
 
114
- return "Evaluation complete.", scores, next_round, "\n".join(summary)
115
 
116
  with gr.Blocks() as demo:
117
- gr.Markdown("# AI Model Evaluator")
 
 
 
 
 
118
 
119
- scores = gr.State({"ChatGPT": [], "DeepSeek": [], "Gemini": []})
120
  round_state = gr.State(0)
121
 
122
- prompt = gr.Textbox(label="Your prompt", lines=3)
123
  gen_btn = gr.Button("Generate answers")
124
  status = gr.Textbox(label="Status", interactive=False)
125
 
126
  with gr.Row():
127
- out_c = gr.Textbox(label="ChatGPT", interactive=False, lines=8)
128
- out_d = gr.Textbox(label="DeepSeek", interactive=False, lines=8)
129
- out_g = gr.Textbox(label="Gemini", interactive=False, lines=8)
130
 
 
131
  with gr.Row():
132
- rate_c = gr.Slider(1, 5, step=1, label="Rate ChatGPT")
133
- rate_d = gr.Slider(1, 5, step=1, label="Rate DeepSeek")
134
- rate_g = gr.Slider(1, 5, step=1, label="Rate Gemini")
135
 
136
  submit_btn = gr.Button("Submit ratings")
137
  summary = gr.Textbox(label="Final ranking", interactive=False, lines=8)
@@ -139,13 +129,13 @@ with gr.Blocks() as demo:
139
  gen_btn.click(
140
  fn=generate_answers,
141
  inputs=[prompt, round_state],
142
- outputs=[status, out_c, out_d, out_g, round_state],
143
  )
144
 
145
  submit_btn.click(
146
  fn=submit_ratings,
147
- inputs=[rate_c, rate_d, rate_g, scores, round_state],
148
- outputs=[status, scores, round_state, summary],
149
  )
150
 
151
  if __name__ == "__main__":
 
1
 
2
  import os
 
3
  import gradio as gr
4
+ from groq import Groq
5
 
6
  MAX_ROUNDS = 5
7
 
8
+ # Initialize Groq client
9
+ def get_groq_client():
10
+ api_key = os.getenv("GROQ_API_KEY")
11
  if not api_key:
12
+ return None, "Error: GROQ_API_KEY is not set. Please configure it in your environment or Hugging Face Space secrets."
13
  try:
14
+ client = Groq(api_key=api_key)
15
+ return client, None
 
 
 
 
 
16
  except Exception as e:
17
+ return None, f"Error creating Groq client: {e}"
18
 
19
+ def call_groq_model(model_id: str, prompt: str) -> str:
20
+ client, err = get_groq_client()
21
+ if err is not None:
22
+ return err
 
23
  try:
24
+ completion = client.chat.completions.create(
25
+ model=model_id,
26
+ messages=[{"role": "user", "content": prompt}],
27
+ max_tokens=512,
28
+ )
29
+ return completion.choices[0].message.content
30
  except Exception as e:
31
+ return f"Error calling Groq model {model_id}: {e}"
32
 
33
+ # Model IDs hosted on Groq (you can change these as you like)
34
+ MODEL_A = "llama3-8b-8192"
35
+ MODEL_B = "llama3-70b-8192"
36
+ MODEL_C = "gemma-7b-it"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
  def generate_answers(prompt, round_num):
39
  if round_num is None:
40
  round_num = 0
41
+
42
  if round_num >= MAX_ROUNDS:
43
  return f"You already completed {MAX_ROUNDS} rounds.", "", "", "", round_num
44
+
45
+ if not prompt or not prompt.strip():
46
  return "Enter a prompt first.", "", "", "", round_num
47
 
48
+ ans_a = call_groq_model(MODEL_A, prompt)
49
+ ans_b = call_groq_model(MODEL_B, prompt)
50
+ ans_c = call_groq_model(MODEL_C, prompt)
51
+
52
+ status = f"Round {round_num + 1} of {MAX_ROUNDS}: Rate each model 1–5."
53
+ return status, ans_a, ans_b, ans_c, round_num
 
54
 
55
+ def submit_ratings(r_a, r_b, r_c, scores, round_num):
56
  if scores is None or not isinstance(scores, dict):
57
+ scores = {"Model A": [], "Model B": [], "Model C": []}
58
 
59
  if round_num is None:
60
  round_num = 0
61
 
62
+ for label, r in [("Model A", r_a), ("Model B", r_b), ("Model C", r_c)]:
63
  if r is None:
64
  return f"Missing rating for {label}.", scores, round_num, ""
65
  if not (1 <= int(r) <= 5):
66
  return f"Rating for {label} must be 1–5.", scores, round_num, ""
67
 
68
+ scores["Model A"].append(int(r_a))
69
+ scores["Model B"].append(int(r_b))
70
+ scores["Model C"].append(int(r_c))
71
 
72
  next_round = round_num + 1
73
  if next_round < MAX_ROUNDS:
74
  return (
75
+ f"Ratings saved for round {next_round}. Enter a new prompt for the next round.",
76
  scores,
77
  next_round,
78
  "",
79
  )
80
 
81
+ def agg(name):
82
+ arr = scores[name]
83
  total = sum(arr)
84
  avg = total / len(arr)
85
  return total, avg
86
 
87
+ summary_lines = []
88
+ summary_lines.append("Final ranking after 5 rounds:")
89
+
90
+ models = ["Model A", "Model B", "Model C"]
91
  results = {m: agg(m) for m in models}
92
  ranking = sorted(models, key=lambda m: results[m][1], reverse=True)
93
 
 
94
  for i, m in enumerate(ranking, 1):
95
  total, avg = results[m]
96
+ summary_lines.append(f"{i}. {m}: total={total}, avg={avg:.2f}")
97
 
98
+ return "Evaluation complete.", scores, next_round, "\n".join(summary_lines)
99
 
100
  with gr.Blocks() as demo:
101
+ gr.Markdown("# Groq AI Model Evaluator")
102
+ gr.Markdown(
103
+ "This app compares three different Groq-hosted models (Model A, Model B, Model C). "
104
+ "For each of 5 rounds, enter a prompt, see three answers, rate each 1–5, "
105
+ "and then see the final ranking based on your scores."
106
+ )
107
 
108
+ scores_state = gr.State({"Model A": [], "Model B": [], "Model C": []})
109
  round_state = gr.State(0)
110
 
111
+ prompt = gr.Textbox(label="Your prompt", lines=3, placeholder="Ask anything you like...")
112
  gen_btn = gr.Button("Generate answers")
113
  status = gr.Textbox(label="Status", interactive=False)
114
 
115
  with gr.Row():
116
+ out_a = gr.Textbox(label=f"Model A ({MODEL_A})", interactive=False, lines=8)
117
+ out_b = gr.Textbox(label=f"Model B ({MODEL_B})", interactive=False, lines=8)
118
+ out_c = gr.Textbox(label=f"Model C ({MODEL_C})", interactive=False, lines=8)
119
 
120
+ gr.Markdown("### Rate each model this round (1 = poor, 5 = excellent)")
121
  with gr.Row():
122
+ rate_a = gr.Slider(1, 5, step=1, label="Rate Model A", value=3)
123
+ rate_b = gr.Slider(1, 5, step=1, label="Rate Model B", value=3)
124
+ rate_c = gr.Slider(1, 5, step=1, label="Rate Model C", value=3)
125
 
126
  submit_btn = gr.Button("Submit ratings")
127
  summary = gr.Textbox(label="Final ranking", interactive=False, lines=8)
 
129
  gen_btn.click(
130
  fn=generate_answers,
131
  inputs=[prompt, round_state],
132
+ outputs=[status, out_a, out_b, out_c, round_state],
133
  )
134
 
135
  submit_btn.click(
136
  fn=submit_ratings,
137
+ inputs=[rate_a, rate_b, rate_c, scores_state, round_state],
138
+ outputs=[status, scores_state, round_state, summary],
139
  )
140
 
141
  if __name__ == "__main__":
requirements.txt CHANGED
@@ -1,5 +1,2 @@
1
-
2
  gradio>=4.0.0
3
- openai>=1.6.0
4
- google-generativeai>=0.8.0
5
- requests>=2.31.0
 
 
1
  gradio>=4.0.0
2
+ groq>=0.9.0