eduard76 commited on
Commit
d052a06
·
verified ·
1 Parent(s): 2329c05

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -70
app.py CHANGED
@@ -1,23 +1,27 @@
1
  import gradio as gr
2
  import openai
3
  import anthropic
 
4
  import threading
5
  import json
6
  import time
 
7
 
8
- # --- Hardcoded API Keys ---
9
- # As requested, the API keys are now part of the script.
 
10
  API_KEYS = {
11
- "openai_api_key": "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",
12
- "anthropic_api_key": "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",
13
- "deepseek_api_key": "sk-84ff2cd7665a430d9e098f51dcc9d109",
14
- "google_api_key": "AIzaSyCAcmOLv2Q8YIhb2opede9l-QQUAjzlBiY",
15
- "groq_api_key": "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",
16
- "ollama_api_key": "ollama" # Static key for local Ollama
17
  }
18
 
19
  # --- Model & API Configuration ---
20
- # This configuration is based on your reference notebook.
 
21
  COMPETITOR_MODELS = [
22
  {
23
  "name": "gpt-4o-mini",
@@ -25,7 +29,7 @@ COMPETITOR_MODELS = [
25
  "key_name": "openai_api_key"
26
  },
27
  {
28
- "name": "claude-sonnet-4-20250514", # Corrected model name
29
  "api_client": "anthropic",
30
  "key_name": "anthropic_api_key"
31
  },
@@ -36,29 +40,26 @@ COMPETITOR_MODELS = [
36
  "key_name": "deepseek_api_key"
37
  },
38
  {
39
- "name": "llama3-8b-8192", # Using a smaller Llama3 model on Groq for speed
40
  "api_client": "openai_compatible",
41
  "base_url": "https://api.groq.com/openai/v1",
42
  "key_name": "groq_api_key"
43
  },
44
  {
45
- "name": "llama3", # Ensure you have 'llama3' pulled via 'ollama pull llama3'
46
  "api_client": "ollama",
47
  "base_url": "http://localhost:11434/v1",
48
  "key_name": "ollama_api_key"
49
  },
50
  {
51
- # Re-integrating Gemini with a standard OpenAI-compatible configuration
52
- "name": "gemini-2.0-flash",
53
- "api_client": "openai_compatible",
54
- "base_url": "https://generativelanguage.googleapis.com/v1beta/openai/",
55
  "key_name": "google_api_key"
56
  }
57
  ]
58
  # --- UI Configuration ---
59
- # FIX: This line was likely missing in your local file, causing the NameError.
60
- MODEL_COLORS = ["#FF6347", "#4682B4", "#32CD32", "#FFD700", "#6A5ACD", "#00CED1"]
61
- JUDGE_MODEL = "o3-mini" # Corrected judge model name
62
 
63
  # --- Helper Function to Query APIs ---
64
  def get_model_response(model_config, api_keys, prompt, results_list):
@@ -73,7 +74,7 @@ def get_model_response(model_config, api_keys, prompt, results_list):
73
 
74
  try:
75
  if not api_key and api_client_type != "ollama":
76
- raise ValueError("API key is missing.")
77
 
78
  messages = [{"role": "user", "content": prompt}]
79
 
@@ -84,23 +85,20 @@ def get_model_response(model_config, api_keys, prompt, results_list):
84
 
85
  elif api_client_type == "anthropic":
86
  client = anthropic.Anthropic(api_key=api_key)
87
- response = client.messages.create(model=model_name, max_tokens=2048, messages=messages)
88
  response_content = response.content[0].text
 
 
 
 
 
 
 
89
 
90
  elif api_client_type in ["openai_compatible", "ollama"]:
91
- # For Google's endpoint, the model name is part of the path, so we construct the URL here.
92
- base_url = model_config.get("base_url", "")
93
- if "googleapis.com" in base_url:
94
- full_url = f"{base_url}/models/{model_config['name']}:generateContent"
95
- # This is a simplified example; a real implementation would use Google's own client library
96
- # or handle the different API structure. For now, we'll try the OpenAI client.
97
- client = openai.OpenAI(api_key=api_key, base_url=base_url)
98
- # The model name for the client needs to be just the model identifier
99
- response = client.chat.completions.create(model=model_config['name'], messages=messages)
100
- else:
101
- client = openai.OpenAI(api_key=api_key, base_url=base_url)
102
- response = client.chat.completions.create(model=model_name, messages=messages)
103
-
104
  response_content = response.choices[0].message.content
105
 
106
  except Exception as e:
@@ -111,26 +109,23 @@ def get_model_response(model_config, api_keys, prompt, results_list):
111
  # --- Main Logic for the Arena (as a Generator) ---
112
  def run_competition(question, progress=gr.Progress(track_tqdm=True)):
113
  """
114
- A generator function that runs the competition and yields UI updates at each stage,
115
- including the state of the button.
116
  """
117
- # --- Stage 1: Initial UI State ---
118
- # Disable button and set "Thinking..." message for all competitor boxes
119
  button_update_running = gr.Button("⚙️ Running Competition...", interactive=False)
120
  initial_text_outputs = ["The winning answer will be displayed here..."] + ["⏳ Thinking..."] * len(COMPETITOR_MODELS)
121
  yield [button_update_running] + initial_text_outputs
122
 
123
  if not question:
124
- # If the question is empty, clear the UI and re-enable the button.
125
  button_update_idle = gr.Button("Run Competition", interactive=True)
126
  blank_outputs = [""] * (1 + len(COMPETITOR_MODELS))
127
  yield [button_update_idle] + blank_outputs
128
  return
129
 
130
- # --- Stage 2: Get Competitor Responses Concurrently ---
131
  progress(0, desc="Querying Competitor Models...")
132
  threads = []
133
- competitor_responses = [] # This list will be populated by the threads
134
  for model_config in COMPETITOR_MODELS:
135
  thread = threading.Thread(
136
  target=get_model_response,
@@ -139,29 +134,26 @@ def run_competition(question, progress=gr.Progress(track_tqdm=True)):
139
  threads.append(thread)
140
  thread.start()
141
 
142
- # Wait for all threads to complete
143
  for thread in threads:
144
  thread.join()
145
 
146
- # --- Stage 3: Update UI with Competitor Responses ---
147
  progress(0.7, desc="All models responded. Awaiting judgment...")
148
  button_update_judging = gr.Button("⚖️ Judging...", interactive=False)
149
 
150
- # Prepare the text outputs for the UI boxes
151
- text_outputs = ["The winning answer will be displayed here..."] # Best answer is still pending
152
  response_dict = {r['model']: r['response'] for r in competitor_responses}
153
  responses_text_for_judge = ""
154
 
155
- # Fill the output list in the correct UI order
156
  for i, model_config in enumerate(COMPETITOR_MODELS):
157
  response = response_dict.get(model_config['name'], f"Error: {model_config['name']} response not found.")
158
  text_outputs.append(response)
159
  responses_text_for_judge += f"# Response from competitor {i+1} ({model_config['name']})\n\n{response}\n\n"
160
 
161
  yield [button_update_judging] + text_outputs
162
- time.sleep(1) # Small delay for better UX
163
 
164
- # --- Stage 4: Get the Judge's Ranking ---
165
  judge_prompt = f"""You are a fair and impartial judge in a competition between {len(competitor_responses)} LLM assistants.
166
  Each model was given this question:
167
  ---
@@ -179,6 +171,10 @@ Now, provide your judgment as a JSON object with the ranked order of the competi
179
 
180
  best_answer_text = "Error: Judge failed to provide a valid ranking."
181
  try:
 
 
 
 
182
  judge_client = openai.OpenAI(api_key=API_KEYS["openai_api_key"])
183
  judge_messages = [{"role": "user", "content": judge_prompt}]
184
 
@@ -190,25 +186,27 @@ Now, provide your judgment as a JSON object with the ranked order of the competi
190
 
191
  results_json = response.choices[0].message.content
192
  results_dict = json.loads(results_json)
193
- ranked_indices = results_dict.get("results", [])
 
194
 
195
  if ranked_indices:
196
- # Find the best answer based on the judge's ranking
197
- best_competitor_num = int(ranked_indices[0]) - 1
198
- # The model name and response are retrieved from the ordered `text_outputs` list
199
- best_model_name = COMPETITOR_MODELS[best_competitor_num]['name']
200
- best_model_color = MODEL_COLORS[best_competitor_num % len(MODEL_COLORS)]
201
- best_answer = text_outputs[best_competitor_num + 1] # +1 to account for best_answer_box at index 0
 
202
  best_answer_text = f"## 🏆 Best Answer (from <span style='color:{best_model_color}; font-weight:bold;'>{best_model_name}</span>)\n\n"
203
  best_answer_text += best_answer
204
 
205
  except Exception as e:
206
  best_answer_text = f"## Error\n\nAn error occurred during judgment: {str(e)}"
207
 
208
- # --- Stage 5: Final UI Update ---
209
  progress(1, desc="Competition Complete!")
210
  button_update_idle = gr.Button("Run Competition", interactive=True)
211
- text_outputs[0] = best_answer_text # Add the final best answer to our output list
212
  yield [button_update_idle] + text_outputs
213
 
214
 
@@ -216,7 +214,6 @@ Now, provide your judgment as a JSON object with the ranked order of the competi
216
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="orange", secondary_hue="blue")) as demo:
217
  gr.Markdown("# Advanced Multi-Model LLM Arena")
218
 
219
- # --- Top Half of the Screen ---
220
  with gr.Row():
221
  with gr.Column(scale=1):
222
  question_box = gr.Textbox(
@@ -225,8 +222,7 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="orange", secondary_hue="blue"))
225
  placeholder="e.g., Explain the concept of emergent properties in complex systems and provide three distinct examples."
226
  )
227
  run_button = gr.Button("Run Competition", variant="primary")
228
- # FIX: Removed the 'label' argument from gr.Progress
229
- progress_bar = gr.Progress()
230
 
231
  with gr.Column(scale=2):
232
  best_answer_box = gr.Markdown("The winning answer will be displayed here...")
@@ -234,30 +230,22 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="orange", secondary_hue="blue"))
234
  gr.Markdown("---")
235
  gr.Markdown("### Competitor Responses")
236
 
237
- # --- Bottom Half of the Screen ---
238
  response_boxes = []
239
- # Create rows with 3 models each
240
  for i in range(0, len(COMPETITOR_MODELS), 3):
241
  with gr.Row():
242
- # Create a column for each model in the row
243
  for j in range(3):
244
  model_index = i + j
245
  if model_index < len(COMPETITOR_MODELS):
246
  with gr.Column():
247
  model_config = COMPETITOR_MODELS[model_index]
248
  model_name = model_config['name']
249
- # Assign color from the list, cycling through if necessary
250
  color = MODEL_COLORS[model_index % len(MODEL_COLORS)]
251
 
252
- # Styled Markdown for the label
253
  gr.Markdown(f"<h3 style='color:{color}; margin-bottom: -10px; text-align:center;'>{model_name}</h3>")
254
 
255
- # Textbox for the response, no label needed here
256
- box = gr.Textbox(lines=10, interactive=False)
257
  response_boxes.append(box)
258
 
259
- # --- Connect the Button to the Logic ---
260
- # The button itself is now an output component that gets updated.
261
  all_outputs = [run_button, best_answer_box] + response_boxes
262
 
263
  run_button.click(
@@ -267,4 +255,4 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="orange", secondary_hue="blue"))
267
  )
268
 
269
  if __name__ == "__main__":
270
- demo.launch(debug=True)
 
1
  import gradio as gr
2
  import openai
3
  import anthropic
4
+ import google.generativeai as genai
5
  import threading
6
  import json
7
  import time
8
+ import os
9
 
10
+ # --- Securely Load API Keys from Environment Variables ---
11
+ # IMPORTANT: Set these keys in your system's environment variables
12
+ # or create a .env file and use a library like 'python-dotenv' to load them.
13
  API_KEYS = {
14
+ "openai_api_key": os.getenv("OPENAI_API_KEY"),
15
+ "anthropic_api_key": os.getenv("ANTHROPIC_API_KEY"),
16
+ "deepseek_api_key": os.getenv("DEEPSEEK_API_KEY"),
17
+ "google_api_key": os.getenv("GOOGLE_API_KEY"),
18
+ "groq_api_key": os.getenv("GROQ_API_KEY"),
19
+ "ollama_api_key": "ollama" # Static key for local Ollama
20
  }
21
 
22
  # --- Model & API Configuration ---
23
+ # FIX: Corrected model names for Claude, Gemini, and the Judge model.
24
+ # FIX: Reconfigured Gemini to use its own 'gemini' api_client.
25
  COMPETITOR_MODELS = [
26
  {
27
  "name": "gpt-4o-mini",
 
29
  "key_name": "openai_api_key"
30
  },
31
  {
32
+ "name": "claude-3-5-sonnet-20240620", # CORRECTED model name
33
  "api_client": "anthropic",
34
  "key_name": "anthropic_api_key"
35
  },
 
40
  "key_name": "deepseek_api_key"
41
  },
42
  {
43
+ "name": "llama3-8b-8192",
44
  "api_client": "openai_compatible",
45
  "base_url": "https://api.groq.com/openai/v1",
46
  "key_name": "groq_api_key"
47
  },
48
  {
49
+ "name": "llama3", # Ensure you have 'llama3' pulled via 'ollama pull llama3'
50
  "api_client": "ollama",
51
  "base_url": "http://localhost:11434/v1",
52
  "key_name": "ollama_api_key"
53
  },
54
  {
55
+ "name": "gemini-1.5-flash-latest", # CORRECTED model name
56
+ "api_client": "gemini", # CORRECTED client type
 
 
57
  "key_name": "google_api_key"
58
  }
59
  ]
60
  # --- UI Configuration ---
61
+ MODEL_COLORS = ["#FF6347", "#D2691E", "#32CD32", "#FFD700", "#6A5ACD", "#00CED1"]
62
+ JUDGE_MODEL = "gpt-4o-mini" # CORRECTED judge model name
 
63
 
64
  # --- Helper Function to Query APIs ---
65
  def get_model_response(model_config, api_keys, prompt, results_list):
 
74
 
75
  try:
76
  if not api_key and api_client_type != "ollama":
77
+ raise ValueError(f"API key '{model_config['key_name']}' is missing.")
78
 
79
  messages = [{"role": "user", "content": prompt}]
80
 
 
85
 
86
  elif api_client_type == "anthropic":
87
  client = anthropic.Anthropic(api_key=api_key)
88
+ response = client.messages.create(model=model_name, max_tokens=4096, messages=messages)
89
  response_content = response.content[0].text
90
+
91
+ # FIX: Added a dedicated block for the Gemini API
92
+ elif api_client_type == "gemini":
93
+ genai.configure(api_key=api_key)
94
+ model = genai.GenerativeModel(model_name)
95
+ response = model.generate_content(prompt)
96
+ response_content = response.text
97
 
98
  elif api_client_type in ["openai_compatible", "ollama"]:
99
+ base_url = model_config.get("base_url")
100
+ client = openai.OpenAI(api_key=api_key, base_url=base_url)
101
+ response = client.chat.completions.create(model=model_name, messages=messages)
 
 
 
 
 
 
 
 
 
 
102
  response_content = response.choices[0].message.content
103
 
104
  except Exception as e:
 
109
  # --- Main Logic for the Arena (as a Generator) ---
110
  def run_competition(question, progress=gr.Progress(track_tqdm=True)):
111
  """
112
+ A generator function that runs the competition and yields UI updates at each stage.
 
113
  """
114
+ # Stage 1: Initial UI State
 
115
  button_update_running = gr.Button("⚙️ Running Competition...", interactive=False)
116
  initial_text_outputs = ["The winning answer will be displayed here..."] + ["⏳ Thinking..."] * len(COMPETITOR_MODELS)
117
  yield [button_update_running] + initial_text_outputs
118
 
119
  if not question:
 
120
  button_update_idle = gr.Button("Run Competition", interactive=True)
121
  blank_outputs = [""] * (1 + len(COMPETITOR_MODELS))
122
  yield [button_update_idle] + blank_outputs
123
  return
124
 
125
+ # Stage 2: Get Competitor Responses Concurrently
126
  progress(0, desc="Querying Competitor Models...")
127
  threads = []
128
+ competitor_responses = []
129
  for model_config in COMPETITOR_MODELS:
130
  thread = threading.Thread(
131
  target=get_model_response,
 
134
  threads.append(thread)
135
  thread.start()
136
 
 
137
  for thread in threads:
138
  thread.join()
139
 
140
+ # Stage 3: Update UI with Competitor Responses
141
  progress(0.7, desc="All models responded. Awaiting judgment...")
142
  button_update_judging = gr.Button("⚖️ Judging...", interactive=False)
143
 
144
+ text_outputs = ["The winning answer will be displayed here..."]
 
145
  response_dict = {r['model']: r['response'] for r in competitor_responses}
146
  responses_text_for_judge = ""
147
 
 
148
  for i, model_config in enumerate(COMPETITOR_MODELS):
149
  response = response_dict.get(model_config['name'], f"Error: {model_config['name']} response not found.")
150
  text_outputs.append(response)
151
  responses_text_for_judge += f"# Response from competitor {i+1} ({model_config['name']})\n\n{response}\n\n"
152
 
153
  yield [button_update_judging] + text_outputs
154
+ time.sleep(1)
155
 
156
+ # Stage 4: Get the Judge's Ranking
157
  judge_prompt = f"""You are a fair and impartial judge in a competition between {len(competitor_responses)} LLM assistants.
158
  Each model was given this question:
159
  ---
 
171
 
172
  best_answer_text = "Error: Judge failed to provide a valid ranking."
173
  try:
174
+ # Ensure the OpenAI API key is available for the judge
175
+ if not API_KEYS["openai_api_key"]:
176
+ raise ValueError("OpenAI API key is missing for the judge model.")
177
+
178
  judge_client = openai.OpenAI(api_key=API_KEYS["openai_api_key"])
179
  judge_messages = [{"role": "user", "content": judge_prompt}]
180
 
 
186
 
187
  results_json = response.choices[0].message.content
188
  results_dict = json.loads(results_json)
189
+ # Handle potential string or integer values from the judge model
190
+ ranked_indices = [str(i) for i in results_dict.get("results", [])]
191
 
192
  if ranked_indices:
193
+ best_competitor_num_str = ranked_indices[0]
194
+ best_competitor_index = int(best_competitor_num_str) - 1
195
+
196
+ best_model_name = COMPETITOR_MODELS[best_competitor_index]['name']
197
+ best_model_color = MODEL_COLORS[best_competitor_index % len(MODEL_COLORS)]
198
+ best_answer = text_outputs[best_competitor_index + 1]
199
+
200
  best_answer_text = f"## 🏆 Best Answer (from <span style='color:{best_model_color}; font-weight:bold;'>{best_model_name}</span>)\n\n"
201
  best_answer_text += best_answer
202
 
203
  except Exception as e:
204
  best_answer_text = f"## Error\n\nAn error occurred during judgment: {str(e)}"
205
 
206
+ # Stage 5: Final UI Update
207
  progress(1, desc="Competition Complete!")
208
  button_update_idle = gr.Button("Run Competition", interactive=True)
209
+ text_outputs[0] = best_answer_text
210
  yield [button_update_idle] + text_outputs
211
 
212
 
 
214
  with gr.Blocks(theme=gr.themes.Soft(primary_hue="orange", secondary_hue="blue")) as demo:
215
  gr.Markdown("# Advanced Multi-Model LLM Arena")
216
 
 
217
  with gr.Row():
218
  with gr.Column(scale=1):
219
  question_box = gr.Textbox(
 
222
  placeholder="e.g., Explain the concept of emergent properties in complex systems and provide three distinct examples."
223
  )
224
  run_button = gr.Button("Run Competition", variant="primary")
225
+ progress_bar = gr.Progress() # This component is controlled by the `gr.Progress` in the function
 
226
 
227
  with gr.Column(scale=2):
228
  best_answer_box = gr.Markdown("The winning answer will be displayed here...")
 
230
  gr.Markdown("---")
231
  gr.Markdown("### Competitor Responses")
232
 
 
233
  response_boxes = []
 
234
  for i in range(0, len(COMPETITOR_MODELS), 3):
235
  with gr.Row():
 
236
  for j in range(3):
237
  model_index = i + j
238
  if model_index < len(COMPETITOR_MODELS):
239
  with gr.Column():
240
  model_config = COMPETITOR_MODELS[model_index]
241
  model_name = model_config['name']
 
242
  color = MODEL_COLORS[model_index % len(MODEL_COLORS)]
243
 
 
244
  gr.Markdown(f"<h3 style='color:{color}; margin-bottom: -10px; text-align:center;'>{model_name}</h3>")
245
 
246
+ box = gr.Textbox(lines=10, interactive=False, container=False)
 
247
  response_boxes.append(box)
248
 
 
 
249
  all_outputs = [run_button, best_answer_box] + response_boxes
250
 
251
  run_button.click(
 
255
  )
256
 
257
  if __name__ == "__main__":
258
+ demo.launch(debug=True)