eduard76 commited on
Commit
57a5c88
·
verified ·
1 Parent(s): 640e8f7

Update app3.py

Browse files
Files changed (1) hide show
  1. app3.py +271 -270
app3.py CHANGED
@@ -1,270 +1,271 @@
1
- import gradio as gr
2
- import openai
3
- import anthropic
4
- import threading
5
- import json
6
- import time
7
-
8
- # --- Hardcoded API Keys ---
9
- # As requested, the API keys are now part of the script.
10
- API_KEYS = {
11
- "openai_api_key": "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",
12
- "anthropic_api_key": "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",
13
- "deepseek_api_key": "sk-84ff2cd7665a430d9e098f51dcc9d109",
14
- "google_api_key": "AIzaSyCAcmOLv2Q8YIhb2opede9l-QQUAjzlBiY",
15
- "groq_api_key": "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",
16
- "ollama_api_key": "ollama" # Static key for local Ollama
17
- }
18
-
19
- # --- Model & API Configuration ---
20
- # This configuration is based on your reference notebook.
21
- COMPETITOR_MODELS = [
22
- {
23
- "name": "gpt-4o-mini",
24
- "api_client": "openai",
25
- "key_name": "openai_api_key"
26
- },
27
- {
28
- "name": "claude-sonnet-4-20250514", # Corrected model name
29
- "api_client": "anthropic",
30
- "key_name": "anthropic_api_key"
31
- },
32
- {
33
- "name": "deepseek-chat",
34
- "api_client": "openai_compatible",
35
- "base_url": "https://api.deepseek.com/v1",
36
- "key_name": "deepseek_api_key"
37
- },
38
- {
39
- "name": "llama3-8b-8192", # Using a smaller Llama3 model on Groq for speed
40
- "api_client": "openai_compatible",
41
- "base_url": "https://api.groq.com/openai/v1",
42
- "key_name": "groq_api_key"
43
- },
44
- {
45
- "name": "llama3", # Ensure you have 'llama3' pulled via 'ollama pull llama3'
46
- "api_client": "ollama",
47
- "base_url": "http://localhost:11434/v1",
48
- "key_name": "ollama_api_key"
49
- },
50
- {
51
- # Re-integrating Gemini with a standard OpenAI-compatible configuration
52
- "name": "gemini-2.0-flash",
53
- "api_client": "openai_compatible",
54
- "base_url": "https://generativelanguage.googleapis.com/v1beta/openai/",
55
- "key_name": "google_api_key"
56
- }
57
- ]
58
- # --- UI Configuration ---
59
- # FIX: This line was likely missing in your local file, causing the NameError.
60
- MODEL_COLORS = ["#FF6347", "#4682B4", "#32CD32", "#FFD700", "#6A5ACD", "#00CED1"]
61
- JUDGE_MODEL = "o3-mini" # Corrected judge model name
62
-
63
- # --- Helper Function to Query APIs ---
64
- def get_model_response(model_config, api_keys, prompt, results_list):
65
- """
66
- Queries an LLM API based on the provided configuration and appends the result to a list.
67
- """
68
- model_name = model_config["name"]
69
- api_client_type = model_config["api_client"]
70
- api_key = api_keys.get(model_config["key_name"])
71
-
72
- response_content = f"Error: Model {model_name} did not respond."
73
-
74
- try:
75
- if not api_key and api_client_type != "ollama":
76
- raise ValueError("API key is missing.")
77
-
78
- messages = [{"role": "user", "content": prompt}]
79
-
80
- if api_client_type == "openai":
81
- client = openai.OpenAI(api_key=api_key)
82
- response = client.chat.completions.create(model=model_name, messages=messages)
83
- response_content = response.choices[0].message.content
84
-
85
- elif api_client_type == "anthropic":
86
- client = anthropic.Anthropic(api_key=api_key)
87
- response = client.messages.create(model=model_name, max_tokens=2048, messages=messages)
88
- response_content = response.content[0].text
89
-
90
- elif api_client_type in ["openai_compatible", "ollama"]:
91
- # For Google's endpoint, the model name is part of the path, so we construct the URL here.
92
- base_url = model_config.get("base_url", "")
93
- if "googleapis.com" in base_url:
94
- full_url = f"{base_url}/models/{model_config['name']}:generateContent"
95
- # This is a simplified example; a real implementation would use Google's own client library
96
- # or handle the different API structure. For now, we'll try the OpenAI client.
97
- client = openai.OpenAI(api_key=api_key, base_url=base_url)
98
- # The model name for the client needs to be just the model identifier
99
- response = client.chat.completions.create(model=model_config['name'], messages=messages)
100
- else:
101
- client = openai.OpenAI(api_key=api_key, base_url=base_url)
102
- response = client.chat.completions.create(model=model_name, messages=messages)
103
-
104
- response_content = response.choices[0].message.content
105
-
106
- except Exception as e:
107
- response_content = f"Error for {model_name}: {str(e)}"
108
-
109
- results_list.append({"model": model_name, "response": response_content})
110
-
111
- # --- Main Logic for the Arena (as a Generator) ---
112
- def run_competition(question, progress=gr.Progress(track_tqdm=True)):
113
- """
114
- A generator function that runs the competition and yields UI updates at each stage,
115
- including the state of the button.
116
- """
117
- # --- Stage 1: Initial UI State ---
118
- # Disable button and set "Thinking..." message for all competitor boxes
119
- button_update_running = gr.Button("⚙️ Running Competition...", interactive=False)
120
- initial_text_outputs = ["The winning answer will be displayed here..."] + ["⏳ Thinking..."] * len(COMPETITOR_MODELS)
121
- yield [button_update_running] + initial_text_outputs
122
-
123
- if not question:
124
- # If the question is empty, clear the UI and re-enable the button.
125
- button_update_idle = gr.Button("Run Competition", interactive=True)
126
- blank_outputs = [""] * (1 + len(COMPETITOR_MODELS))
127
- yield [button_update_idle] + blank_outputs
128
- return
129
-
130
- # --- Stage 2: Get Competitor Responses Concurrently ---
131
- progress(0, desc="Querying Competitor Models...")
132
- threads = []
133
- competitor_responses = [] # This list will be populated by the threads
134
- for model_config in COMPETITOR_MODELS:
135
- thread = threading.Thread(
136
- target=get_model_response,
137
- args=(model_config, API_KEYS, question, competitor_responses)
138
- )
139
- threads.append(thread)
140
- thread.start()
141
-
142
- # Wait for all threads to complete
143
- for thread in threads:
144
- thread.join()
145
-
146
- # --- Stage 3: Update UI with Competitor Responses ---
147
- progress(0.7, desc="All models responded. Awaiting judgment...")
148
- button_update_judging = gr.Button("⚖️ Judging...", interactive=False)
149
-
150
- # Prepare the text outputs for the UI boxes
151
- text_outputs = ["The winning answer will be displayed here..."] # Best answer is still pending
152
- response_dict = {r['model']: r['response'] for r in competitor_responses}
153
- responses_text_for_judge = ""
154
-
155
- # Fill the output list in the correct UI order
156
- for i, model_config in enumerate(COMPETITOR_MODELS):
157
- response = response_dict.get(model_config['name'], f"Error: {model_config['name']} response not found.")
158
- text_outputs.append(response)
159
- responses_text_for_judge += f"# Response from competitor {i+1} ({model_config['name']})\n\n{response}\n\n"
160
-
161
- yield [button_update_judging] + text_outputs
162
- time.sleep(1) # Small delay for better UX
163
-
164
- # --- Stage 4: Get the Judge's Ranking ---
165
- judge_prompt = f"""You are a fair and impartial judge in a competition between {len(competitor_responses)} LLM assistants.
166
- Each model was given this question:
167
- ---
168
- {question}
169
- ---
170
- Your task is to evaluate each response for clarity, accuracy, and depth of reasoning. Then, you must rank them in order from best to worst.
171
- You must respond with JSON, and only JSON, with the following format:
172
- {{"results": ["best competitor number", "second best competitor number", ...]}}
173
-
174
- Here are the responses from each competitor:
175
- ---
176
- {responses_text_for_judge}
177
- ---
178
- Now, provide your judgment as a JSON object with the ranked order of the competitors. Do not include any other text, markdown formatting, or code blocks."""
179
-
180
- best_answer_text = "Error: Judge failed to provide a valid ranking."
181
- try:
182
- judge_client = openai.OpenAI(api_key=API_KEYS["openai_api_key"])
183
- judge_messages = [{"role": "user", "content": judge_prompt}]
184
-
185
- response = judge_client.chat.completions.create(
186
- model=JUDGE_MODEL,
187
- messages=judge_messages,
188
- response_format={"type": "json_object"}
189
- )
190
-
191
- results_json = response.choices[0].message.content
192
- results_dict = json.loads(results_json)
193
- ranked_indices = results_dict.get("results", [])
194
-
195
- if ranked_indices:
196
- # Find the best answer based on the judge's ranking
197
- best_competitor_num = int(ranked_indices[0]) - 1
198
- # The model name and response are retrieved from the ordered `text_outputs` list
199
- best_model_name = COMPETITOR_MODELS[best_competitor_num]['name']
200
- best_model_color = MODEL_COLORS[best_competitor_num % len(MODEL_COLORS)]
201
- best_answer = text_outputs[best_competitor_num + 1] # +1 to account for best_answer_box at index 0
202
- best_answer_text = f"## 🏆 Best Answer (from <span style='color:{best_model_color}; font-weight:bold;'>{best_model_name}</span>)\n\n"
203
- best_answer_text += best_answer
204
-
205
- except Exception as e:
206
- best_answer_text = f"## Error\n\nAn error occurred during judgment: {str(e)}"
207
-
208
- # --- Stage 5: Final UI Update ---
209
- progress(1, desc="Competition Complete!")
210
- button_update_idle = gr.Button("Run Competition", interactive=True)
211
- text_outputs[0] = best_answer_text # Add the final best answer to our output list
212
- yield [button_update_idle] + text_outputs
213
-
214
-
215
- # --- Gradio User Interface ---
216
- with gr.Blocks(theme=gr.themes.Soft(primary_hue="orange", secondary_hue="blue")) as demo:
217
- gr.Markdown("# Advanced Multi-Model LLM Arena")
218
-
219
- # --- Top Half of the Screen ---
220
- with gr.Row():
221
- with gr.Column(scale=1):
222
- question_box = gr.Textbox(
223
- label="Enter Your Question Here",
224
- lines=6,
225
- placeholder="e.g., Explain the concept of emergent properties in complex systems and provide three distinct examples."
226
- )
227
- run_button = gr.Button("Run Competition", variant="primary")
228
- # FIX: Removed the 'label' argument from gr.Progress
229
- progress_bar = gr.Progress()
230
-
231
- with gr.Column(scale=2):
232
- best_answer_box = gr.Markdown("The winning answer will be displayed here...")
233
-
234
- gr.Markdown("---")
235
- gr.Markdown("### Competitor Responses")
236
-
237
- # --- Bottom Half of the Screen ---
238
- response_boxes = []
239
- # Create rows with 3 models each
240
- for i in range(0, len(COMPETITOR_MODELS), 3):
241
- with gr.Row():
242
- # Create a column for each model in the row
243
- for j in range(3):
244
- model_index = i + j
245
- if model_index < len(COMPETITOR_MODELS):
246
- with gr.Column():
247
- model_config = COMPETITOR_MODELS[model_index]
248
- model_name = model_config['name']
249
- # Assign color from the list, cycling through if necessary
250
- color = MODEL_COLORS[model_index % len(MODEL_COLORS)]
251
-
252
- # Styled Markdown for the label
253
- gr.Markdown(f"<h3 style='color:{color}; margin-bottom: -10px; text-align:center;'>{model_name}</h3>")
254
-
255
- # Textbox for the response, no label needed here
256
- box = gr.Textbox(lines=10, interactive=False)
257
- response_boxes.append(box)
258
-
259
- # --- Connect the Button to the Logic ---
260
- # The button itself is now an output component that gets updated.
261
- all_outputs = [run_button, best_answer_box] + response_boxes
262
-
263
- run_button.click(
264
- fn=run_competition,
265
- inputs=[question_box],
266
- outputs=all_outputs
267
- )
268
-
269
- if __name__ == "__main__":
270
- demo.launch(debug=True)
 
 
1
+ import gradio as gr
2
+ import openai
3
+ import anthropic
4
+ import threading
5
+ import json
6
+ import time
7
+
8
+
9
+ # --- Hardcoded API Keys ---
10
+ # As requested, the API keys are now part of the script.
11
+ API_KEYS = {
12
+ "openai_api_key": "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",
13
+ "anthropic_api_key": "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",
14
+ "deepseek_api_key": "sk-84ff2cd7665a430d9e098f51dcc9d109",
15
+ "google_api_key": "AIzaSyCAcmOLv2Q8YIhb2opede9l-QQUAjzlBiY",
16
+ "groq_api_key": "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",
17
+ "ollama_api_key": "ollama" # Static key for local Ollama
18
+ }
19
+
20
+ # --- Model & API Configuration ---
21
+ # This configuration is based on your reference notebook.
22
+ COMPETITOR_MODELS = [
23
+ {
24
+ "name": "gpt-4o-mini",
25
+ "api_client": "openai",
26
+ "key_name": "openai_api_key"
27
+ },
28
+ {
29
+ "name": "claude-sonnet-4-20250514", # Corrected model name
30
+ "api_client": "anthropic",
31
+ "key_name": "anthropic_api_key"
32
+ },
33
+ {
34
+ "name": "deepseek-chat",
35
+ "api_client": "openai_compatible",
36
+ "base_url": "https://api.deepseek.com/v1",
37
+ "key_name": "deepseek_api_key"
38
+ },
39
+ {
40
+ "name": "llama3-8b-8192", # Using a smaller Llama3 model on Groq for speed
41
+ "api_client": "openai_compatible",
42
+ "base_url": "https://api.groq.com/openai/v1",
43
+ "key_name": "groq_api_key"
44
+ },
45
+ {
46
+ "name": "llama3", # Ensure you have 'llama3' pulled via 'ollama pull llama3'
47
+ "api_client": "ollama",
48
+ "base_url": "http://localhost:11434/v1",
49
+ "key_name": "ollama_api_key"
50
+ },
51
+ {
52
+ # Re-integrating Gemini with a standard OpenAI-compatible configuration
53
+ "name": "gemini-2.0-flash",
54
+ "api_client": "openai_compatible",
55
+ "base_url": "https://generativelanguage.googleapis.com/v1beta/openai/",
56
+ "key_name": "google_api_key"
57
+ }
58
+ ]
59
+ # --- UI Configuration ---
60
+ # FIX: This line was likely missing in your local file, causing the NameError.
61
+ MODEL_COLORS = ["#FF6347", "#4682B4", "#32CD32", "#FFD700", "#6A5ACD", "#00CED1"]
62
+ JUDGE_MODEL = "o3-mini" # Corrected judge model name
63
+
64
+ # --- Helper Function to Query APIs ---
65
+ def get_model_response(model_config, api_keys, prompt, results_list):
66
+ """
67
+ Queries an LLM API based on the provided configuration and appends the result to a list.
68
+ """
69
+ model_name = model_config["name"]
70
+ api_client_type = model_config["api_client"]
71
+ api_key = api_keys.get(model_config["key_name"])
72
+
73
+ response_content = f"Error: Model {model_name} did not respond."
74
+
75
+ try:
76
+ if not api_key and api_client_type != "ollama":
77
+ raise ValueError("API key is missing.")
78
+
79
+ messages = [{"role": "user", "content": prompt}]
80
+
81
+ if api_client_type == "openai":
82
+ client = openai.OpenAI(api_key=api_key)
83
+ response = client.chat.completions.create(model=model_name, messages=messages)
84
+ response_content = response.choices[0].message.content
85
+
86
+ elif api_client_type == "anthropic":
87
+ client = anthropic.Anthropic(api_key=api_key)
88
+ response = client.messages.create(model=model_name, max_tokens=2048, messages=messages)
89
+ response_content = response.content[0].text
90
+
91
+ elif api_client_type in ["openai_compatible", "ollama"]:
92
+ # For Google's endpoint, the model name is part of the path, so we construct the URL here.
93
+ base_url = model_config.get("base_url", "")
94
+ if "googleapis.com" in base_url:
95
+ full_url = f"{base_url}/models/{model_config['name']}:generateContent"
96
+ # This is a simplified example; a real implementation would use Google's own client library
97
+ # or handle the different API structure. For now, we'll try the OpenAI client.
98
+ client = openai.OpenAI(api_key=api_key, base_url=base_url)
99
+ # The model name for the client needs to be just the model identifier
100
+ response = client.chat.completions.create(model=model_config['name'], messages=messages)
101
+ else:
102
+ client = openai.OpenAI(api_key=api_key, base_url=base_url)
103
+ response = client.chat.completions.create(model=model_name, messages=messages)
104
+
105
+ response_content = response.choices[0].message.content
106
+
107
+ except Exception as e:
108
+ response_content = f"Error for {model_name}: {str(e)}"
109
+
110
+ results_list.append({"model": model_name, "response": response_content})
111
+
112
+ # --- Main Logic for the Arena (as a Generator) ---
113
+ def run_competition(question, progress=gr.Progress(track_tqdm=True)):
114
+ """
115
+ A generator function that runs the competition and yields UI updates at each stage,
116
+ including the state of the button.
117
+ """
118
+ # --- Stage 1: Initial UI State ---
119
+ # Disable button and set "Thinking..." message for all competitor boxes
120
+ button_update_running = gr.Button("⚙️ Running Competition...", interactive=False)
121
+ initial_text_outputs = ["The winning answer will be displayed here..."] + ["⏳ Thinking..."] * len(COMPETITOR_MODELS)
122
+ yield [button_update_running] + initial_text_outputs
123
+
124
+ if not question:
125
+ # If the question is empty, clear the UI and re-enable the button.
126
+ button_update_idle = gr.Button("Run Competition", interactive=True)
127
+ blank_outputs = [""] * (1 + len(COMPETITOR_MODELS))
128
+ yield [button_update_idle] + blank_outputs
129
+ return
130
+
131
+ # --- Stage 2: Get Competitor Responses Concurrently ---
132
+ progress(0, desc="Querying Competitor Models...")
133
+ threads = []
134
+ competitor_responses = [] # This list will be populated by the threads
135
+ for model_config in COMPETITOR_MODELS:
136
+ thread = threading.Thread(
137
+ target=get_model_response,
138
+ args=(model_config, API_KEYS, question, competitor_responses)
139
+ )
140
+ threads.append(thread)
141
+ thread.start()
142
+
143
+ # Wait for all threads to complete
144
+ for thread in threads:
145
+ thread.join()
146
+
147
+ # --- Stage 3: Update UI with Competitor Responses ---
148
+ progress(0.7, desc="All models responded. Awaiting judgment...")
149
+ button_update_judging = gr.Button("⚖️ Judging...", interactive=False)
150
+
151
+ # Prepare the text outputs for the UI boxes
152
+ text_outputs = ["The winning answer will be displayed here..."] # Best answer is still pending
153
+ response_dict = {r['model']: r['response'] for r in competitor_responses}
154
+ responses_text_for_judge = ""
155
+
156
+ # Fill the output list in the correct UI order
157
+ for i, model_config in enumerate(COMPETITOR_MODELS):
158
+ response = response_dict.get(model_config['name'], f"Error: {model_config['name']} response not found.")
159
+ text_outputs.append(response)
160
+ responses_text_for_judge += f"# Response from competitor {i+1} ({model_config['name']})\n\n{response}\n\n"
161
+
162
+ yield [button_update_judging] + text_outputs
163
+ time.sleep(1) # Small delay for better UX
164
+
165
+ # --- Stage 4: Get the Judge's Ranking ---
166
+ judge_prompt = f"""You are a fair and impartial judge in a competition between {len(competitor_responses)} LLM assistants.
167
+ Each model was given this question:
168
+ ---
169
+ {question}
170
+ ---
171
+ Your task is to evaluate each response for clarity, accuracy, and depth of reasoning. Then, you must rank them in order from best to worst.
172
+ You must respond with JSON, and only JSON, with the following format:
173
+ {{"results": ["best competitor number", "second best competitor number", ...]}}
174
+
175
+ Here are the responses from each competitor:
176
+ ---
177
+ {responses_text_for_judge}
178
+ ---
179
+ Now, provide your judgment as a JSON object with the ranked order of the competitors. Do not include any other text, markdown formatting, or code blocks."""
180
+
181
+ best_answer_text = "Error: Judge failed to provide a valid ranking."
182
+ try:
183
+ judge_client = openai.OpenAI(api_key=API_KEYS["openai_api_key"])
184
+ judge_messages = [{"role": "user", "content": judge_prompt}]
185
+
186
+ response = judge_client.chat.completions.create(
187
+ model=JUDGE_MODEL,
188
+ messages=judge_messages,
189
+ response_format={"type": "json_object"}
190
+ )
191
+
192
+ results_json = response.choices[0].message.content
193
+ results_dict = json.loads(results_json)
194
+ ranked_indices = results_dict.get("results", [])
195
+
196
+ if ranked_indices:
197
+ # Find the best answer based on the judge's ranking
198
+ best_competitor_num = int(ranked_indices[0]) - 1
199
+ # The model name and response are retrieved from the ordered `text_outputs` list
200
+ best_model_name = COMPETITOR_MODELS[best_competitor_num]['name']
201
+ best_model_color = MODEL_COLORS[best_competitor_num % len(MODEL_COLORS)]
202
+ best_answer = text_outputs[best_competitor_num + 1] # +1 to account for best_answer_box at index 0
203
+ best_answer_text = f"## 🏆 Best Answer (from <span style='color:{best_model_color}; font-weight:bold;'>{best_model_name}</span>)\n\n"
204
+ best_answer_text += best_answer
205
+
206
+ except Exception as e:
207
+ best_answer_text = f"## Error\n\nAn error occurred during judgment: {str(e)}"
208
+
209
+ # --- Stage 5: Final UI Update ---
210
+ progress(1, desc="Competition Complete!")
211
+ button_update_idle = gr.Button("Run Competition", interactive=True)
212
+ text_outputs[0] = best_answer_text # Add the final best answer to our output list
213
+ yield [button_update_idle] + text_outputs
214
+
215
+
216
+ # --- Gradio User Interface ---
217
+ with gr.Blocks(theme=gr.themes.Soft(primary_hue="orange", secondary_hue="blue")) as demo:
218
+ gr.Markdown("# Advanced Multi-Model LLM Arena")
219
+
220
+ # --- Top Half of the Screen ---
221
+ with gr.Row():
222
+ with gr.Column(scale=1):
223
+ question_box = gr.Textbox(
224
+ label="Enter Your Question Here",
225
+ lines=6,
226
+ placeholder="e.g., Explain the concept of emergent properties in complex systems and provide three distinct examples."
227
+ )
228
+ run_button = gr.Button("Run Competition", variant="primary")
229
+ # FIX: Removed the 'label' argument from gr.Progress
230
+ progress_bar = gr.Progress()
231
+
232
+ with gr.Column(scale=2):
233
+ best_answer_box = gr.Markdown("The winning answer will be displayed here...")
234
+
235
+ gr.Markdown("---")
236
+ gr.Markdown("### Competitor Responses")
237
+
238
+ # --- Bottom Half of the Screen ---
239
+ response_boxes = []
240
+ # Create rows with 3 models each
241
+ for i in range(0, len(COMPETITOR_MODELS), 3):
242
+ with gr.Row():
243
+ # Create a column for each model in the row
244
+ for j in range(3):
245
+ model_index = i + j
246
+ if model_index < len(COMPETITOR_MODELS):
247
+ with gr.Column():
248
+ model_config = COMPETITOR_MODELS[model_index]
249
+ model_name = model_config['name']
250
+ # Assign color from the list, cycling through if necessary
251
+ color = MODEL_COLORS[model_index % len(MODEL_COLORS)]
252
+
253
+ # Styled Markdown for the label
254
+ gr.Markdown(f"<h3 style='color:{color}; margin-bottom: -10px; text-align:center;'>{model_name}</h3>")
255
+
256
+ # Textbox for the response, no label needed here
257
+ box = gr.Textbox(lines=10, interactive=False)
258
+ response_boxes.append(box)
259
+
260
+ # --- Connect the Button to the Logic ---
261
+ # The button itself is now an output component that gets updated.
262
+ all_outputs = [run_button, best_answer_box] + response_boxes
263
+
264
+ run_button.click(
265
+ fn=run_competition,
266
+ inputs=[question_box],
267
+ outputs=all_outputs
268
+ )
269
+
270
+ if __name__ == "__main__":
271
+ demo.launch(debug=True)