eduard76 commited on
Commit
2329c05
·
verified ·
1 Parent(s): b6e9c46

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +270 -0
app.py ADDED
@@ -0,0 +1,270 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import openai
3
+ import anthropic
4
+ import threading
5
+ import json
6
+ import time
7
+
8
+ # --- Hardcoded API Keys ---
9
+ # As requested, the API keys are now part of the script.
10
+ API_KEYS = {
11
+ "openai_api_key": "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",
12
+ "anthropic_api_key": "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",
13
+ "deepseek_api_key": "sk-84ff2cd7665a430d9e098f51dcc9d109",
14
+ "google_api_key": "AIzaSyCAcmOLv2Q8YIhb2opede9l-QQUAjzlBiY",
15
+ "groq_api_key": "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",
16
+ "ollama_api_key": "ollama" # Static key for local Ollama
17
+ }
18
+
19
+ # --- Model & API Configuration ---
20
+ # This configuration is based on your reference notebook.
21
+ COMPETITOR_MODELS = [
22
+ {
23
+ "name": "gpt-4o-mini",
24
+ "api_client": "openai",
25
+ "key_name": "openai_api_key"
26
+ },
27
+ {
28
+ "name": "claude-sonnet-4-20250514", # Corrected model name
29
+ "api_client": "anthropic",
30
+ "key_name": "anthropic_api_key"
31
+ },
32
+ {
33
+ "name": "deepseek-chat",
34
+ "api_client": "openai_compatible",
35
+ "base_url": "https://api.deepseek.com/v1",
36
+ "key_name": "deepseek_api_key"
37
+ },
38
+ {
39
+ "name": "llama3-8b-8192", # Using a smaller Llama3 model on Groq for speed
40
+ "api_client": "openai_compatible",
41
+ "base_url": "https://api.groq.com/openai/v1",
42
+ "key_name": "groq_api_key"
43
+ },
44
+ {
45
+ "name": "llama3", # Ensure you have 'llama3' pulled via 'ollama pull llama3'
46
+ "api_client": "ollama",
47
+ "base_url": "http://localhost:11434/v1",
48
+ "key_name": "ollama_api_key"
49
+ },
50
+ {
51
+ # Re-integrating Gemini with a standard OpenAI-compatible configuration
52
+ "name": "gemini-2.0-flash",
53
+ "api_client": "openai_compatible",
54
+ "base_url": "https://generativelanguage.googleapis.com/v1beta/openai/",
55
+ "key_name": "google_api_key"
56
+ }
57
+ ]
58
+ # --- UI Configuration ---
59
+ # FIX: This line was likely missing in your local file, causing the NameError.
60
+ MODEL_COLORS = ["#FF6347", "#4682B4", "#32CD32", "#FFD700", "#6A5ACD", "#00CED1"]
61
+ JUDGE_MODEL = "o3-mini" # Corrected judge model name
62
+
63
+ # --- Helper Function to Query APIs ---
64
+ def get_model_response(model_config, api_keys, prompt, results_list):
65
+ """
66
+ Queries an LLM API based on the provided configuration and appends the result to a list.
67
+ """
68
+ model_name = model_config["name"]
69
+ api_client_type = model_config["api_client"]
70
+ api_key = api_keys.get(model_config["key_name"])
71
+
72
+ response_content = f"Error: Model {model_name} did not respond."
73
+
74
+ try:
75
+ if not api_key and api_client_type != "ollama":
76
+ raise ValueError("API key is missing.")
77
+
78
+ messages = [{"role": "user", "content": prompt}]
79
+
80
+ if api_client_type == "openai":
81
+ client = openai.OpenAI(api_key=api_key)
82
+ response = client.chat.completions.create(model=model_name, messages=messages)
83
+ response_content = response.choices[0].message.content
84
+
85
+ elif api_client_type == "anthropic":
86
+ client = anthropic.Anthropic(api_key=api_key)
87
+ response = client.messages.create(model=model_name, max_tokens=2048, messages=messages)
88
+ response_content = response.content[0].text
89
+
90
+ elif api_client_type in ["openai_compatible", "ollama"]:
91
+ # For Google's endpoint, the model name is part of the path, so we construct the URL here.
92
+ base_url = model_config.get("base_url", "")
93
+ if "googleapis.com" in base_url:
94
+ full_url = f"{base_url}/models/{model_config['name']}:generateContent"
95
+ # This is a simplified example; a real implementation would use Google's own client library
96
+ # or handle the different API structure. For now, we'll try the OpenAI client.
97
+ client = openai.OpenAI(api_key=api_key, base_url=base_url)
98
+ # The model name for the client needs to be just the model identifier
99
+ response = client.chat.completions.create(model=model_config['name'], messages=messages)
100
+ else:
101
+ client = openai.OpenAI(api_key=api_key, base_url=base_url)
102
+ response = client.chat.completions.create(model=model_name, messages=messages)
103
+
104
+ response_content = response.choices[0].message.content
105
+
106
+ except Exception as e:
107
+ response_content = f"Error for {model_name}: {str(e)}"
108
+
109
+ results_list.append({"model": model_name, "response": response_content})
110
+
111
+ # --- Main Logic for the Arena (as a Generator) ---
112
+ def run_competition(question, progress=gr.Progress(track_tqdm=True)):
113
+ """
114
+ A generator function that runs the competition and yields UI updates at each stage,
115
+ including the state of the button.
116
+ """
117
+ # --- Stage 1: Initial UI State ---
118
+ # Disable button and set "Thinking..." message for all competitor boxes
119
+ button_update_running = gr.Button("⚙️ Running Competition...", interactive=False)
120
+ initial_text_outputs = ["The winning answer will be displayed here..."] + ["⏳ Thinking..."] * len(COMPETITOR_MODELS)
121
+ yield [button_update_running] + initial_text_outputs
122
+
123
+ if not question:
124
+ # If the question is empty, clear the UI and re-enable the button.
125
+ button_update_idle = gr.Button("Run Competition", interactive=True)
126
+ blank_outputs = [""] * (1 + len(COMPETITOR_MODELS))
127
+ yield [button_update_idle] + blank_outputs
128
+ return
129
+
130
+ # --- Stage 2: Get Competitor Responses Concurrently ---
131
+ progress(0, desc="Querying Competitor Models...")
132
+ threads = []
133
+ competitor_responses = [] # This list will be populated by the threads
134
+ for model_config in COMPETITOR_MODELS:
135
+ thread = threading.Thread(
136
+ target=get_model_response,
137
+ args=(model_config, API_KEYS, question, competitor_responses)
138
+ )
139
+ threads.append(thread)
140
+ thread.start()
141
+
142
+ # Wait for all threads to complete
143
+ for thread in threads:
144
+ thread.join()
145
+
146
+ # --- Stage 3: Update UI with Competitor Responses ---
147
+ progress(0.7, desc="All models responded. Awaiting judgment...")
148
+ button_update_judging = gr.Button("⚖️ Judging...", interactive=False)
149
+
150
+ # Prepare the text outputs for the UI boxes
151
+ text_outputs = ["The winning answer will be displayed here..."] # Best answer is still pending
152
+ response_dict = {r['model']: r['response'] for r in competitor_responses}
153
+ responses_text_for_judge = ""
154
+
155
+ # Fill the output list in the correct UI order
156
+ for i, model_config in enumerate(COMPETITOR_MODELS):
157
+ response = response_dict.get(model_config['name'], f"Error: {model_config['name']} response not found.")
158
+ text_outputs.append(response)
159
+ responses_text_for_judge += f"# Response from competitor {i+1} ({model_config['name']})\n\n{response}\n\n"
160
+
161
+ yield [button_update_judging] + text_outputs
162
+ time.sleep(1) # Small delay for better UX
163
+
164
+ # --- Stage 4: Get the Judge's Ranking ---
165
+ judge_prompt = f"""You are a fair and impartial judge in a competition between {len(competitor_responses)} LLM assistants.
166
+ Each model was given this question:
167
+ ---
168
+ {question}
169
+ ---
170
+ Your task is to evaluate each response for clarity, accuracy, and depth of reasoning. Then, you must rank them in order from best to worst.
171
+ You must respond with JSON, and only JSON, with the following format:
172
+ {{"results": ["best competitor number", "second best competitor number", ...]}}
173
+
174
+ Here are the responses from each competitor:
175
+ ---
176
+ {responses_text_for_judge}
177
+ ---
178
+ Now, provide your judgment as a JSON object with the ranked order of the competitors. Do not include any other text, markdown formatting, or code blocks."""
179
+
180
+ best_answer_text = "Error: Judge failed to provide a valid ranking."
181
+ try:
182
+ judge_client = openai.OpenAI(api_key=API_KEYS["openai_api_key"])
183
+ judge_messages = [{"role": "user", "content": judge_prompt}]
184
+
185
+ response = judge_client.chat.completions.create(
186
+ model=JUDGE_MODEL,
187
+ messages=judge_messages,
188
+ response_format={"type": "json_object"}
189
+ )
190
+
191
+ results_json = response.choices[0].message.content
192
+ results_dict = json.loads(results_json)
193
+ ranked_indices = results_dict.get("results", [])
194
+
195
+ if ranked_indices:
196
+ # Find the best answer based on the judge's ranking
197
+ best_competitor_num = int(ranked_indices[0]) - 1
198
+ # The model name and response are retrieved from the ordered `text_outputs` list
199
+ best_model_name = COMPETITOR_MODELS[best_competitor_num]['name']
200
+ best_model_color = MODEL_COLORS[best_competitor_num % len(MODEL_COLORS)]
201
+ best_answer = text_outputs[best_competitor_num + 1] # +1 to account for best_answer_box at index 0
202
+ best_answer_text = f"## 🏆 Best Answer (from <span style='color:{best_model_color}; font-weight:bold;'>{best_model_name}</span>)\n\n"
203
+ best_answer_text += best_answer
204
+
205
+ except Exception as e:
206
+ best_answer_text = f"## Error\n\nAn error occurred during judgment: {str(e)}"
207
+
208
+ # --- Stage 5: Final UI Update ---
209
+ progress(1, desc="Competition Complete!")
210
+ button_update_idle = gr.Button("Run Competition", interactive=True)
211
+ text_outputs[0] = best_answer_text # Add the final best answer to our output list
212
+ yield [button_update_idle] + text_outputs
213
+
214
+
215
+ # --- Gradio User Interface ---
216
+ with gr.Blocks(theme=gr.themes.Soft(primary_hue="orange", secondary_hue="blue")) as demo:
217
+ gr.Markdown("# Advanced Multi-Model LLM Arena")
218
+
219
+ # --- Top Half of the Screen ---
220
+ with gr.Row():
221
+ with gr.Column(scale=1):
222
+ question_box = gr.Textbox(
223
+ label="Enter Your Question Here",
224
+ lines=6,
225
+ placeholder="e.g., Explain the concept of emergent properties in complex systems and provide three distinct examples."
226
+ )
227
+ run_button = gr.Button("Run Competition", variant="primary")
228
+ # FIX: Removed the 'label' argument from gr.Progress
229
+ progress_bar = gr.Progress()
230
+
231
+ with gr.Column(scale=2):
232
+ best_answer_box = gr.Markdown("The winning answer will be displayed here...")
233
+
234
+ gr.Markdown("---")
235
+ gr.Markdown("### Competitor Responses")
236
+
237
+ # --- Bottom Half of the Screen ---
238
+ response_boxes = []
239
+ # Create rows with 3 models each
240
+ for i in range(0, len(COMPETITOR_MODELS), 3):
241
+ with gr.Row():
242
+ # Create a column for each model in the row
243
+ for j in range(3):
244
+ model_index = i + j
245
+ if model_index < len(COMPETITOR_MODELS):
246
+ with gr.Column():
247
+ model_config = COMPETITOR_MODELS[model_index]
248
+ model_name = model_config['name']
249
+ # Assign color from the list, cycling through if necessary
250
+ color = MODEL_COLORS[model_index % len(MODEL_COLORS)]
251
+
252
+ # Styled Markdown for the label
253
+ gr.Markdown(f"<h3 style='color:{color}; margin-bottom: -10px; text-align:center;'>{model_name}</h3>")
254
+
255
+ # Textbox for the response, no label needed here
256
+ box = gr.Textbox(lines=10, interactive=False)
257
+ response_boxes.append(box)
258
+
259
+ # --- Connect the Button to the Logic ---
260
+ # The button itself is now an output component that gets updated.
261
+ all_outputs = [run_button, best_answer_box] + response_boxes
262
+
263
+ run_button.click(
264
+ fn=run_competition,
265
+ inputs=[question_box],
266
+ outputs=all_outputs
267
+ )
268
+
269
+ if __name__ == "__main__":
270
+ demo.launch(debug=True)