eduard76 commited on
Commit
b6e9c46
·
verified ·
1 Parent(s): 57a5c88

Delete app3.py

Browse files
Files changed (1) hide show
  1. app3.py +0 -271
app3.py DELETED
@@ -1,271 +0,0 @@
1
- import gradio as gr
2
- import openai
3
- import anthropic
4
- import threading
5
- import json
6
- import time
7
-
8
-
9
- # --- Hardcoded API Keys ---
10
- # As requested, the API keys are now part of the script.
11
- API_KEYS = {
12
- "openai_api_key": "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",
13
- "anthropic_api_key": "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",
14
- "deepseek_api_key": "sk-84ff2cd7665a430d9e098f51dcc9d109",
15
- "google_api_key": "AIzaSyCAcmOLv2Q8YIhb2opede9l-QQUAjzlBiY",
16
- "groq_api_key": "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",
17
- "ollama_api_key": "ollama" # Static key for local Ollama
18
- }
19
-
20
- # --- Model & API Configuration ---
21
- # This configuration is based on your reference notebook.
22
- COMPETITOR_MODELS = [
23
- {
24
- "name": "gpt-4o-mini",
25
- "api_client": "openai",
26
- "key_name": "openai_api_key"
27
- },
28
- {
29
- "name": "claude-sonnet-4-20250514", # Corrected model name
30
- "api_client": "anthropic",
31
- "key_name": "anthropic_api_key"
32
- },
33
- {
34
- "name": "deepseek-chat",
35
- "api_client": "openai_compatible",
36
- "base_url": "https://api.deepseek.com/v1",
37
- "key_name": "deepseek_api_key"
38
- },
39
- {
40
- "name": "llama3-8b-8192", # Using a smaller Llama3 model on Groq for speed
41
- "api_client": "openai_compatible",
42
- "base_url": "https://api.groq.com/openai/v1",
43
- "key_name": "groq_api_key"
44
- },
45
- {
46
- "name": "llama3", # Ensure you have 'llama3' pulled via 'ollama pull llama3'
47
- "api_client": "ollama",
48
- "base_url": "http://localhost:11434/v1",
49
- "key_name": "ollama_api_key"
50
- },
51
- {
52
- # Re-integrating Gemini with a standard OpenAI-compatible configuration
53
- "name": "gemini-2.0-flash",
54
- "api_client": "openai_compatible",
55
- "base_url": "https://generativelanguage.googleapis.com/v1beta/openai/",
56
- "key_name": "google_api_key"
57
- }
58
- ]
59
- # --- UI Configuration ---
60
- # FIX: This line was likely missing in your local file, causing the NameError.
61
- MODEL_COLORS = ["#FF6347", "#4682B4", "#32CD32", "#FFD700", "#6A5ACD", "#00CED1"]
62
- JUDGE_MODEL = "o3-mini" # Corrected judge model name
63
-
64
- # --- Helper Function to Query APIs ---
65
- def get_model_response(model_config, api_keys, prompt, results_list):
66
- """
67
- Queries an LLM API based on the provided configuration and appends the result to a list.
68
- """
69
- model_name = model_config["name"]
70
- api_client_type = model_config["api_client"]
71
- api_key = api_keys.get(model_config["key_name"])
72
-
73
- response_content = f"Error: Model {model_name} did not respond."
74
-
75
- try:
76
- if not api_key and api_client_type != "ollama":
77
- raise ValueError("API key is missing.")
78
-
79
- messages = [{"role": "user", "content": prompt}]
80
-
81
- if api_client_type == "openai":
82
- client = openai.OpenAI(api_key=api_key)
83
- response = client.chat.completions.create(model=model_name, messages=messages)
84
- response_content = response.choices[0].message.content
85
-
86
- elif api_client_type == "anthropic":
87
- client = anthropic.Anthropic(api_key=api_key)
88
- response = client.messages.create(model=model_name, max_tokens=2048, messages=messages)
89
- response_content = response.content[0].text
90
-
91
- elif api_client_type in ["openai_compatible", "ollama"]:
92
- # For Google's endpoint, the model name is part of the path, so we construct the URL here.
93
- base_url = model_config.get("base_url", "")
94
- if "googleapis.com" in base_url:
95
- full_url = f"{base_url}/models/{model_config['name']}:generateContent"
96
- # This is a simplified example; a real implementation would use Google's own client library
97
- # or handle the different API structure. For now, we'll try the OpenAI client.
98
- client = openai.OpenAI(api_key=api_key, base_url=base_url)
99
- # The model name for the client needs to be just the model identifier
100
- response = client.chat.completions.create(model=model_config['name'], messages=messages)
101
- else:
102
- client = openai.OpenAI(api_key=api_key, base_url=base_url)
103
- response = client.chat.completions.create(model=model_name, messages=messages)
104
-
105
- response_content = response.choices[0].message.content
106
-
107
- except Exception as e:
108
- response_content = f"Error for {model_name}: {str(e)}"
109
-
110
- results_list.append({"model": model_name, "response": response_content})
111
-
112
- # --- Main Logic for the Arena (as a Generator) ---
113
- def run_competition(question, progress=gr.Progress(track_tqdm=True)):
114
- """
115
- A generator function that runs the competition and yields UI updates at each stage,
116
- including the state of the button.
117
- """
118
- # --- Stage 1: Initial UI State ---
119
- # Disable button and set "Thinking..." message for all competitor boxes
120
- button_update_running = gr.Button("⚙️ Running Competition...", interactive=False)
121
- initial_text_outputs = ["The winning answer will be displayed here..."] + ["⏳ Thinking..."] * len(COMPETITOR_MODELS)
122
- yield [button_update_running] + initial_text_outputs
123
-
124
- if not question:
125
- # If the question is empty, clear the UI and re-enable the button.
126
- button_update_idle = gr.Button("Run Competition", interactive=True)
127
- blank_outputs = [""] * (1 + len(COMPETITOR_MODELS))
128
- yield [button_update_idle] + blank_outputs
129
- return
130
-
131
- # --- Stage 2: Get Competitor Responses Concurrently ---
132
- progress(0, desc="Querying Competitor Models...")
133
- threads = []
134
- competitor_responses = [] # This list will be populated by the threads
135
- for model_config in COMPETITOR_MODELS:
136
- thread = threading.Thread(
137
- target=get_model_response,
138
- args=(model_config, API_KEYS, question, competitor_responses)
139
- )
140
- threads.append(thread)
141
- thread.start()
142
-
143
- # Wait for all threads to complete
144
- for thread in threads:
145
- thread.join()
146
-
147
- # --- Stage 3: Update UI with Competitor Responses ---
148
- progress(0.7, desc="All models responded. Awaiting judgment...")
149
- button_update_judging = gr.Button("⚖️ Judging...", interactive=False)
150
-
151
- # Prepare the text outputs for the UI boxes
152
- text_outputs = ["The winning answer will be displayed here..."] # Best answer is still pending
153
- response_dict = {r['model']: r['response'] for r in competitor_responses}
154
- responses_text_for_judge = ""
155
-
156
- # Fill the output list in the correct UI order
157
- for i, model_config in enumerate(COMPETITOR_MODELS):
158
- response = response_dict.get(model_config['name'], f"Error: {model_config['name']} response not found.")
159
- text_outputs.append(response)
160
- responses_text_for_judge += f"# Response from competitor {i+1} ({model_config['name']})\n\n{response}\n\n"
161
-
162
- yield [button_update_judging] + text_outputs
163
- time.sleep(1) # Small delay for better UX
164
-
165
- # --- Stage 4: Get the Judge's Ranking ---
166
- judge_prompt = f"""You are a fair and impartial judge in a competition between {len(competitor_responses)} LLM assistants.
167
- Each model was given this question:
168
- ---
169
- {question}
170
- ---
171
- Your task is to evaluate each response for clarity, accuracy, and depth of reasoning. Then, you must rank them in order from best to worst.
172
- You must respond with JSON, and only JSON, with the following format:
173
- {{"results": ["best competitor number", "second best competitor number", ...]}}
174
-
175
- Here are the responses from each competitor:
176
- ---
177
- {responses_text_for_judge}
178
- ---
179
- Now, provide your judgment as a JSON object with the ranked order of the competitors. Do not include any other text, markdown formatting, or code blocks."""
180
-
181
- best_answer_text = "Error: Judge failed to provide a valid ranking."
182
- try:
183
- judge_client = openai.OpenAI(api_key=API_KEYS["openai_api_key"])
184
- judge_messages = [{"role": "user", "content": judge_prompt}]
185
-
186
- response = judge_client.chat.completions.create(
187
- model=JUDGE_MODEL,
188
- messages=judge_messages,
189
- response_format={"type": "json_object"}
190
- )
191
-
192
- results_json = response.choices[0].message.content
193
- results_dict = json.loads(results_json)
194
- ranked_indices = results_dict.get("results", [])
195
-
196
- if ranked_indices:
197
- # Find the best answer based on the judge's ranking
198
- best_competitor_num = int(ranked_indices[0]) - 1
199
- # The model name and response are retrieved from the ordered `text_outputs` list
200
- best_model_name = COMPETITOR_MODELS[best_competitor_num]['name']
201
- best_model_color = MODEL_COLORS[best_competitor_num % len(MODEL_COLORS)]
202
- best_answer = text_outputs[best_competitor_num + 1] # +1 to account for best_answer_box at index 0
203
- best_answer_text = f"## 🏆 Best Answer (from <span style='color:{best_model_color}; font-weight:bold;'>{best_model_name}</span>)\n\n"
204
- best_answer_text += best_answer
205
-
206
- except Exception as e:
207
- best_answer_text = f"## Error\n\nAn error occurred during judgment: {str(e)}"
208
-
209
- # --- Stage 5: Final UI Update ---
210
- progress(1, desc="Competition Complete!")
211
- button_update_idle = gr.Button("Run Competition", interactive=True)
212
- text_outputs[0] = best_answer_text # Add the final best answer to our output list
213
- yield [button_update_idle] + text_outputs
214
-
215
-
216
- # --- Gradio User Interface ---
217
- with gr.Blocks(theme=gr.themes.Soft(primary_hue="orange", secondary_hue="blue")) as demo:
218
- gr.Markdown("# Advanced Multi-Model LLM Arena")
219
-
220
- # --- Top Half of the Screen ---
221
- with gr.Row():
222
- with gr.Column(scale=1):
223
- question_box = gr.Textbox(
224
- label="Enter Your Question Here",
225
- lines=6,
226
- placeholder="e.g., Explain the concept of emergent properties in complex systems and provide three distinct examples."
227
- )
228
- run_button = gr.Button("Run Competition", variant="primary")
229
- # FIX: Removed the 'label' argument from gr.Progress
230
- progress_bar = gr.Progress()
231
-
232
- with gr.Column(scale=2):
233
- best_answer_box = gr.Markdown("The winning answer will be displayed here...")
234
-
235
- gr.Markdown("---")
236
- gr.Markdown("### Competitor Responses")
237
-
238
- # --- Bottom Half of the Screen ---
239
- response_boxes = []
240
- # Create rows with 3 models each
241
- for i in range(0, len(COMPETITOR_MODELS), 3):
242
- with gr.Row():
243
- # Create a column for each model in the row
244
- for j in range(3):
245
- model_index = i + j
246
- if model_index < len(COMPETITOR_MODELS):
247
- with gr.Column():
248
- model_config = COMPETITOR_MODELS[model_index]
249
- model_name = model_config['name']
250
- # Assign color from the list, cycling through if necessary
251
- color = MODEL_COLORS[model_index % len(MODEL_COLORS)]
252
-
253
- # Styled Markdown for the label
254
- gr.Markdown(f"<h3 style='color:{color}; margin-bottom: -10px; text-align:center;'>{model_name}</h3>")
255
-
256
- # Textbox for the response, no label needed here
257
- box = gr.Textbox(lines=10, interactive=False)
258
- response_boxes.append(box)
259
-
260
- # --- Connect the Button to the Logic ---
261
- # The button itself is now an output component that gets updated.
262
- all_outputs = [run_button, best_answer_box] + response_boxes
263
-
264
- run_button.click(
265
- fn=run_competition,
266
- inputs=[question_box],
267
- outputs=all_outputs
268
- )
269
-
270
- if __name__ == "__main__":
271
- demo.launch(debug=True)