Mehedi2 commited on
Commit
352cf41
·
verified ·
1 Parent(s): cceff53

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +216 -390
app.py CHANGED
@@ -2,10 +2,12 @@ import os
2
  import requests
3
  import json
4
  import gradio as gr
5
- from typing import Dict, Any, Optional
 
6
 
7
- # Set your OpenRouter API key as environment variable
8
  OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY") or os.getenv("my_key")
 
9
 
10
  class OpenRouterLLM:
11
  def __init__(self, api_key: str, model: str = "deepseek/deepseek-v3.1-terminus"):
@@ -13,17 +15,17 @@ class OpenRouterLLM:
13
  self.model = model
14
  self.base_url = "https://openrouter.ai/api/v1/chat/completions"
15
 
16
- def __call__(self, prompt: str, max_tokens: int = 1000, temperature: float = 0.3) -> str:
17
- """Make API call to OpenRouter with DeepSeek V3.1 Terminus"""
18
 
19
  if not self.api_key or not self.api_key.startswith('sk-or-v1-'):
20
- return "Error: Invalid OpenRouter API key. Please configure your API key."
21
 
22
  headers = {
23
  "Authorization": f"Bearer {self.api_key}",
24
  "Content-Type": "application/json",
25
  "HTTP-Referer": "https://huggingface.co/spaces/Mehedi2/Gaia-Test-Agent",
26
- "X-Title": "AI Navigation Agent"
27
  }
28
 
29
  payload = {
@@ -31,7 +33,12 @@ class OpenRouterLLM:
31
  "messages": [
32
  {
33
  "role": "system",
34
- "content": "You are a helpful AI assistant. Answer questions clearly and accurately."
 
 
 
 
 
35
  },
36
  {
37
  "role": "user",
@@ -51,432 +58,261 @@ class OpenRouterLLM:
51
  timeout=30
52
  )
53
 
54
- if response.status_code == 401:
55
- return "Error: Invalid API key or unauthorized."
56
- elif response.status_code == 402:
57
- return "Error: Insufficient credits in OpenRouter account."
58
- elif response.status_code == 429:
59
- return "Error: Rate limit exceeded. Please wait and try again."
60
- elif response.status_code != 200:
61
- return f"Error: HTTP {response.status_code} - {response.text[:200]}"
62
 
63
  result = response.json()
64
 
65
  if "choices" in result and len(result["choices"]) > 0:
66
  return result["choices"][0]["message"]["content"].strip()
67
  else:
68
- return "Error: No response content received."
69
-
70
- except requests.exceptions.Timeout:
71
- return "Error: Request timeout. Please try again."
72
- except requests.exceptions.RequestException as e:
73
- return f"Error calling OpenRouter API: {str(e)}"
74
  except Exception as e:
75
  return f"Error: {str(e)}"
76
 
77
- def run_agent(prompt: str) -> str:
78
- """
79
- Main function for GAIA evaluation
80
- Takes any text prompt and returns a response
81
- """
82
- try:
83
- # Check if API key is available
84
- if not OPENROUTER_API_KEY:
85
- return "Error: No API key configured. Please set OPENROUTER_API_KEY environment variable."
86
-
87
- # Initialize the LLM
88
- llm = OpenRouterLLM(api_key=OPENROUTER_API_KEY, model="deepseek/deepseek-v3.1-terminus")
89
-
90
- # Check if this is a navigation-related query
91
- navigation_keywords = ['route', 'navigation', 'direction', 'coordinate', 'latitude', 'longitude', 'drive', 'travel']
92
- if any(keyword in prompt.lower() for keyword in navigation_keywords):
93
- # Try to extract coordinates or provide navigation guidance
94
  enhanced_prompt = f"""
95
- You are a navigation assistant. The user asked: "{prompt}"
96
 
97
- If they provided coordinates, help them with navigation. If not, ask for specific locations or coordinates.
98
- Provide helpful navigation-related information.
99
- """
100
- else:
101
- # General AI assistant prompt
102
- enhanced_prompt = f"""
103
- You are a helpful AI assistant. Please answer the following question accurately and thoroughly:
104
 
105
- {prompt}
 
 
 
106
 
107
- Provide a clear, factual response based on your knowledge.
108
- """
109
-
110
- # Get response from LLM
111
- response = llm(enhanced_prompt, max_tokens=1500, temperature=0.3)
112
- return response
113
-
114
- except Exception as e:
115
- return f"Error processing request: {str(e)}"
116
 
117
- def fetch_route_from_osrm(origin: str, destination: str) -> str:
118
- """Fetch route from OSRM API"""
 
 
 
 
 
 
119
 
120
- try:
121
- # Validate coordinates
122
- origin_parts = origin.split(',')
123
- dest_parts = destination.split(',')
124
 
125
- if len(origin_parts) != 2 or len(dest_parts) != 2:
126
- return "Error: Coordinates must be in 'longitude,latitude' format"
 
 
 
127
 
128
- # Parse coordinates
129
- float(origin_parts[0]), float(origin_parts[1])
130
- float(dest_parts[0]), float(dest_parts[1])
131
 
132
- except (ValueError, IndexError):
133
- return "Error: Invalid coordinate format"
 
 
 
134
 
135
- url = f"http://router.project-osrm.org/route/v1/driving/{origin};{destination}"
136
- params = {
137
- "overview": "false",
138
- "steps": "true",
139
- "geometries": "geojson"
140
- }
 
 
 
 
141
 
142
- try:
143
- response = requests.get(url, params=params, timeout=15)
144
- response.raise_for_status()
145
- data = response.json()
146
-
147
- if not data.get("routes") or len(data["routes"]) == 0:
148
- return "No route found between the specified locations."
149
-
150
- route = data["routes"][0]
151
- total_distance_km = route.get("distance", 0) / 1000
152
- total_duration_min = route.get("duration", 0) / 60
153
-
154
- # Process turn-by-turn instructions
155
- instructions = []
156
- step_number = 1
157
-
158
- for leg in route["legs"]:
159
- for step in leg["steps"]:
160
- maneuver = step.get("maneuver", {})
161
- step_type = maneuver.get("type", "continue")
162
- modifier = maneuver.get("modifier", "")
163
- road_name = step.get("name", "")
164
- distance_m = step.get("distance", 0)
165
-
166
- if distance_m < 10:
167
- continue
168
-
169
- instruction = f"{step_number}. "
170
-
171
- if step_type == "depart":
172
- direction = "Start your journey"
173
- if modifier:
174
- direction += f" heading {modifier}"
175
- if road_name:
176
- direction += f" on {road_name}"
177
-
178
- elif step_type == "arrive":
179
- instruction += "You have arrived at your destination!"
180
- instructions.append(instruction)
181
- break
182
-
183
- elif step_type == "turn":
184
- direction = f"Turn {modifier}" if modifier else "Turn"
185
- if road_name:
186
- direction += f" onto {road_name}"
187
-
188
- elif step_type == "merge":
189
- direction = f"Merge {modifier}" if modifier else "Merge"
190
- if road_name:
191
- direction += f" onto {road_name}"
192
-
193
- elif step_type == "continue":
194
- direction = "Continue straight"
195
- if road_name:
196
- direction += f" on {road_name}"
197
-
198
- else:
199
- direction = f"{step_type.replace('_', ' ').title()}"
200
- if modifier:
201
- direction += f" {modifier}"
202
- if road_name:
203
- direction += f" on {road_name}"
204
-
205
- if distance_m >= 100:
206
- if distance_m >= 1000:
207
- direction += f" for {distance_m/1000:.1f} km"
208
- else:
209
- direction += f" for {distance_m:.0f} meters"
210
 
211
- instruction += direction
212
- instructions.append(instruction)
213
- step_number += 1
214
-
215
- route_summary = f"""ROUTE SUMMARY
216
- Distance: {total_distance_km:.1f} km
217
- Estimated Time: {total_duration_min:.0f} minutes
218
- From: {origin} to {destination}
219
 
220
- TURN-BY-TURN DIRECTIONS:
221
- {chr(10).join(instructions)}
222
 
223
- Total Steps: {len(instructions)}
224
- """
225
-
226
- return route_summary.strip()
227
-
228
- except Exception as e:
229
- return f"Error fetching route: {str(e)}"
230
 
231
- def navigate_with_ai(origin_lat, origin_lon, dest_lat, dest_lon, progress=gr.Progress()):
232
- """Main navigation function for Gradio interface"""
233
-
234
- progress(0, desc="Starting navigation...")
235
-
236
- # Validate inputs
237
- try:
238
- origin_lat = float(origin_lat)
239
- origin_lon = float(origin_lon)
240
- dest_lat = float(dest_lat)
241
- dest_lon = float(dest_lon)
242
- except (ValueError, TypeError):
243
- return "Error: Please enter valid numeric coordinates."
244
-
245
- # Check coordinate ranges
246
- if not (-90 <= origin_lat <= 90) or not (-180 <= origin_lon <= 180):
247
- return "Error: Origin coordinates out of valid range."
248
- if not (-90 <= dest_lat <= 90) or not (-180 <= dest_lon <= 180):
249
- return "Error: Destination coordinates out of valid range."
250
-
251
- # Format coordinates
252
- origin = f"{origin_lon},{origin_lat}"
253
- destination = f"{dest_lon},{dest_lat}"
254
-
255
- progress(0.3, desc="Fetching route data...")
256
-
257
- # Get route from OSRM
258
- raw_route = fetch_route_from_osrm(origin, destination)
259
 
260
- if raw_route.startswith("Error"):
261
- return raw_route
262
 
263
- progress(0.7, desc="Generating AI summary...")
264
 
265
- # Check if API key is available
266
- if not OPENROUTER_API_KEY:
267
- return f"""Warning: No API key configured. Showing raw route data:
268
-
269
- {raw_route}
270
-
271
- To get AI-enhanced summaries, please configure your OpenRouter API key in the Space settings."""
272
 
273
- # Generate AI summary
274
- llm = OpenRouterLLM(api_key=OPENROUTER_API_KEY, model="deepseek/deepseek-v3.1-terminus")
275
 
276
- prompt = f"""
277
- Analyze this route information and create a helpful navigation summary:
278
-
279
- {raw_route}
280
-
281
- Please provide:
282
- 1. A brief overview of the journey
283
- 2. Simplified directions with key landmarks
284
- 3. Any important notes about the route
285
- 4. Travel tips if relevant
286
-
287
- Format your response to be clear and easy to follow.
288
- """
 
 
 
 
289
 
290
- progress(0.9, desc="Finalizing response...")
291
 
292
- ai_summary = llm(prompt, max_tokens=1200, temperature=0.2)
 
 
293
 
294
  progress(1.0, desc="Complete!")
295
 
296
- return ai_summary
297
-
298
- # Predefined location examples
299
- LOCATION_EXAMPLES = {
300
- "Dhaka, Bangladesh": (23.8103, 90.4125),
301
- "Chittagong, Bangladesh": (22.3569, 91.7832),
302
- "London, UK": (51.5074, -0.1278),
303
- "New York, USA": (40.7128, -74.0060),
304
- "Paris, France": (48.8566, 2.3522),
305
- "Tokyo, Japan": (35.6762, 139.6503),
306
- "Sydney, Australia": (-33.8688, 151.2093)
307
- }
308
-
309
- def set_example_location(location_name, is_destination=False):
310
- """Set example location coordinates"""
311
- if location_name in LOCATION_EXAMPLES:
312
- lat, lon = LOCATION_EXAMPLES[location_name]
313
- return lat, lon
314
- return None, None
315
 
316
  # Create Gradio interface
317
  def create_gradio_app():
318
- with gr.Blocks(
319
- title="AI Navigation Agent",
320
- theme=gr.themes.Soft(),
321
- css="""
322
- .main-header {
323
- text-align: center;
324
- background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
325
- color: white;
326
- padding: 20px;
327
- border-radius: 10px;
328
- margin-bottom: 20px;
329
- }
330
- """
331
- ) as app:
332
 
333
  gr.HTML("""
334
- <div class="main-header">
335
- <h1>AI Navigation Agent</h1>
336
- <p>Get AI-powered route planning with DeepSeek V3.1 Terminus</p>
337
  </div>
338
  """)
339
 
340
- with gr.Row():
341
- with gr.Column():
342
- gr.Markdown("### Origin (Starting Point)")
343
-
344
- with gr.Row():
345
- origin_lat = gr.Number(
346
- label="Latitude",
347
- placeholder="e.g., 23.8103",
348
- value=23.8103,
349
- precision=6
350
- )
351
- origin_lon = gr.Number(
352
- label="Longitude",
353
- placeholder="e.g., 90.4125",
354
- value=90.4125,
355
- precision=6
356
- )
357
-
358
- origin_examples = gr.Dropdown(
359
- choices=list(LOCATION_EXAMPLES.keys()),
360
- label="Or choose a preset location",
361
- value=None
362
- )
363
-
364
- with gr.Column():
365
- gr.Markdown("### Destination (End Point)")
366
-
367
- with gr.Row():
368
- dest_lat = gr.Number(
369
- label="Latitude",
370
- placeholder="e.g., 22.3569",
371
- value=22.3569,
372
- precision=6
373
- )
374
- dest_lon = gr.Number(
375
- label="Longitude",
376
- placeholder="e.g., 91.7832",
377
- value=91.7832,
378
- precision=6
379
- )
380
-
381
- dest_examples = gr.Dropdown(
382
- choices=list(LOCATION_EXAMPLES.keys()),
383
- label="Or choose a preset location",
384
- value=None
385
- )
386
-
387
- with gr.Row():
388
- clear_btn = gr.Button("Clear", variant="secondary")
389
- navigate_btn = gr.Button("Get Navigation", variant="primary", size="lg")
390
 
391
- with gr.Row():
392
- output = gr.Textbox(
393
- label="Navigation Result",
394
- lines=20,
395
- placeholder="Your navigation instructions will appear here...",
396
- show_copy_button=True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
397
  )
398
 
399
- # Add a simple chat interface for GAIA testing
400
- with gr.Row():
401
- gr.Markdown("### General AI Assistant (for GAIA evaluation)")
402
 
403
- with gr.Row():
404
- chat_input = gr.Textbox(
405
- label="Ask any question",
406
- placeholder="Type your question here...",
407
  lines=3
408
  )
409
 
410
- with gr.Row():
411
- chat_btn = gr.Button("Ask AI", variant="primary")
 
 
 
 
412
 
413
- with gr.Row():
414
- chat_output = gr.Textbox(
415
- label="AI Response",
416
- lines=10,
417
- placeholder="AI response will appear here...",
418
- show_copy_button=True
419
  )
420
 
421
  gr.Markdown("""
422
  ### How to Use:
423
- 1. **Navigation**: Enter coordinates for route planning
424
- 2. **General Questions**: Use the chat interface below for any questions
425
- 3. **GAIA Testing**: The chat interface is used for GAIA evaluation
426
-
427
- ### Coordinate Format:
428
- - Latitude: -90 to 90 (North/South)
429
- - Longitude: -180 to 180 (East/West)
430
- - Example: Dhaka is at 23.8103, 90.4125
431
  """)
432
-
433
- # Event handlers
434
- def set_origin_example(location):
435
- if location:
436
- lat, lon = set_example_location(location)
437
- return lat, lon
438
- return gr.update(), gr.update()
439
-
440
- def set_dest_example(location):
441
- if location:
442
- lat, lon = set_example_location(location)
443
- return lat, lon
444
- return gr.update(), gr.update()
445
-
446
- def clear_all():
447
- return "", "", "", "", None, None, ""
448
-
449
- # Wire up events
450
- origin_examples.change(
451
- fn=set_origin_example,
452
- inputs=[origin_examples],
453
- outputs=[origin_lat, origin_lon]
454
- )
455
-
456
- dest_examples.change(
457
- fn=set_dest_example,
458
- inputs=[dest_examples],
459
- outputs=[dest_lat, dest_lon]
460
- )
461
-
462
- navigate_btn.click(
463
- fn=navigate_with_ai,
464
- inputs=[origin_lat, origin_lon, dest_lat, dest_lon],
465
- outputs=[output],
466
- show_progress=True
467
- )
468
-
469
- clear_btn.click(
470
- fn=clear_all,
471
- outputs=[origin_lat, origin_lon, dest_lat, dest_lon, origin_examples, dest_examples, output]
472
- )
473
-
474
- # Chat interface for GAIA
475
- chat_btn.click(
476
- fn=run_agent,
477
- inputs=[chat_input],
478
- outputs=[chat_output]
479
- )
480
 
481
  return app
482
 
@@ -484,17 +320,7 @@ def create_gradio_app():
484
  if __name__ == "__main__":
485
  app = create_gradio_app()
486
 
487
- # Check if running on Hugging Face Spaces
488
  if os.getenv("SPACE_ID"):
489
- # Running on HF Spaces
490
- app.launch(
491
- server_name="0.0.0.0",
492
- server_port=7860,
493
- show_api=False
494
- )
495
  else:
496
- # Running locally
497
- app.launch(
498
- share=True,
499
- show_api=False
500
- )
 
2
  import requests
3
  import json
4
  import gradio as gr
5
+ from typing import Dict, List, Any
6
+ import time
7
 
8
+ # Your OpenRouter API key
9
  OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY") or os.getenv("my_key")
10
+ GAIA_API_BASE = "https://huggingface.co/api/gaia" # Replace with actual GAIA API URL
11
 
12
  class OpenRouterLLM:
13
  def __init__(self, api_key: str, model: str = "deepseek/deepseek-v3.1-terminus"):
 
15
  self.model = model
16
  self.base_url = "https://openrouter.ai/api/v1/chat/completions"
17
 
18
+ def __call__(self, prompt: str, max_tokens: int = 1000, temperature: float = 0.1) -> str:
19
+ """Make API call to OpenRouter"""
20
 
21
  if not self.api_key or not self.api_key.startswith('sk-or-v1-'):
22
+ return "Error: Invalid OpenRouter API key"
23
 
24
  headers = {
25
  "Authorization": f"Bearer {self.api_key}",
26
  "Content-Type": "application/json",
27
  "HTTP-Referer": "https://huggingface.co/spaces/Mehedi2/Gaia-Test-Agent",
28
+ "X-Title": "GAIA Test Agent"
29
  }
30
 
31
  payload = {
 
33
  "messages": [
34
  {
35
  "role": "system",
36
+ "content": """You are a helpful AI assistant designed to answer questions accurately and concisely.
37
+ For GAIA evaluation, provide EXACT answers without explanation unless asked.
38
+ - For math questions, give just the number
39
+ - For yes/no questions, give just "Yes" or "No"
40
+ - For factual questions, give just the fact
41
+ - Be precise and direct."""
42
  },
43
  {
44
  "role": "user",
 
58
  timeout=30
59
  )
60
 
61
+ if response.status_code != 200:
62
+ return f"API Error: {response.status_code}"
 
 
 
 
 
 
63
 
64
  result = response.json()
65
 
66
  if "choices" in result and len(result["choices"]) > 0:
67
  return result["choices"][0]["message"]["content"].strip()
68
  else:
69
+ return "Error: No response content received"
70
+
 
 
 
 
71
  except Exception as e:
72
  return f"Error: {str(e)}"
73
 
74
+ class GAIAAgent:
75
+ def __init__(self, api_key: str):
76
+ self.llm = OpenRouterLLM(api_key=api_key)
77
+ self.api_key = api_key
78
+
79
+ def run_agent(self, prompt: str) -> str:
80
+ """
81
+ Main function for GAIA evaluation
82
+ This is what GAIA calls to get answers
83
+ """
84
+ try:
85
+ # Process the question to get a direct answer
 
 
 
 
 
86
  enhanced_prompt = f"""
87
+ Question: {prompt}
88
 
89
+ Analyze this question carefully and provide the exact answer. Do not include explanations, reasoning, or extra text unless specifically asked for reasoning.
 
 
 
 
 
 
90
 
91
+ Examples of good responses:
92
+ - Math question "What is 15 + 27?" → Answer: "42"
93
+ - Yes/No question "Is Paris the capital of France?" → Answer: "Yes"
94
+ - Factual question "What is the capital of Japan?" → Answer: "Tokyo"
95
 
96
+ Your answer:"""
 
 
 
 
 
 
 
 
97
 
98
+ response = self.llm(enhanced_prompt, max_tokens=500, temperature=0.1)
99
+
100
+ # Clean up the response to get just the answer
101
+ answer = self.clean_answer(response)
102
+ return answer
103
+
104
+ except Exception as e:
105
+ return f"Error: {str(e)}"
106
 
107
+ def clean_answer(self, response: str) -> str:
108
+ """Clean the response to extract just the answer"""
109
+ response = response.strip()
 
110
 
111
+ # Remove common prefixes
112
+ prefixes_to_remove = [
113
+ "Answer:", "The answer is:", "Response:", "Result:",
114
+ "Final answer:", "Solution:", "A:", "Answer is:"
115
+ ]
116
 
117
+ for prefix in prefixes_to_remove:
118
+ if response.lower().startswith(prefix.lower()):
119
+ response = response[len(prefix):].strip()
120
 
121
+ # Remove quotes if they wrap the entire answer
122
+ if response.startswith('"') and response.endswith('"'):
123
+ response = response[1:-1]
124
+
125
+ return response
126
 
127
+ def get_questions(self) -> List[Dict]:
128
+ """Get questions from GAIA API"""
129
+ try:
130
+ response = requests.get(f"{GAIA_API_BASE}/questions", timeout=30)
131
+ if response.status_code == 200:
132
+ return response.json()
133
+ else:
134
+ return []
135
+ except:
136
+ return []
137
 
138
+ def get_random_question(self) -> Dict:
139
+ """Get a random question from GAIA API"""
140
+ try:
141
+ response = requests.get(f"{GAIA_API_BASE}/random-question", timeout=30)
142
+ if response.status_code == 200:
143
+ return response.json()
144
+ else:
145
+ return {}
146
+ except:
147
+ return {}
148
+
149
+ def submit_answers(self, username: str, agent_code: str, answers: List[Dict]) -> Dict:
150
+ """Submit answers to GAIA for scoring"""
151
+ try:
152
+ payload = {
153
+ "username": username,
154
+ "agent_code": agent_code,
155
+ "answers": answers
156
+ }
157
+
158
+ response = requests.post(
159
+ f"{GAIA_API_BASE}/submit",
160
+ json=payload,
161
+ timeout=60
162
+ )
163
+
164
+ if response.status_code == 200:
165
+ return response.json()
166
+ else:
167
+ return {"error": f"Submission failed: {response.status_code}"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
 
169
+ except Exception as e:
170
+ return {"error": f"Submission error: {str(e)}"}
 
 
 
 
 
 
171
 
172
+ # Initialize the agent
173
+ agent = GAIAAgent(api_key=OPENROUTER_API_KEY)
174
 
175
+ def run_agent(prompt: str) -> str:
176
+ """Main function that GAIA will call"""
177
+ return agent.run_agent(prompt)
 
 
 
 
178
 
179
+ def test_single_question():
180
+ """Test the agent with a single question"""
181
+ question = agent.get_random_question()
182
+ if question:
183
+ answer = run_agent(question.get("Question", ""))
184
+ return f"Question: {question.get('Question', '')}\nAnswer: {answer}"
185
+ return "Failed to get question"
186
+
187
+ def run_full_evaluation(username: str, progress=gr.Progress()):
188
+ """Run full GAIA evaluation"""
189
+ if not username:
190
+ return "Please provide your Hugging Face username"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
 
192
+ if not OPENROUTER_API_KEY:
193
+ return "Please configure your OpenRouter API key"
194
 
195
+ progress(0.1, desc="Getting questions...")
196
 
197
+ # Get all questions
198
+ questions = agent.get_questions()
199
+ if not questions:
200
+ return "Failed to retrieve questions from GAIA API"
 
 
 
201
 
202
+ progress(0.2, desc=f"Processing {len(questions)} questions...")
 
203
 
204
+ # Process each question
205
+ answers = []
206
+ for i, question in enumerate(questions):
207
+ progress(0.2 + (0.7 * i / len(questions)), desc=f"Processing question {i+1}/{len(questions)}")
208
+
209
+ task_id = question.get("task_id", "")
210
+ question_text = question.get("Question", "")
211
+
212
+ if question_text:
213
+ answer = run_agent(question_text)
214
+ answers.append({
215
+ "task_id": task_id,
216
+ "submitted_answer": answer
217
+ })
218
+
219
+ # Small delay to avoid rate limiting
220
+ time.sleep(0.5)
221
 
222
+ progress(0.9, desc="Submitting answers...")
223
 
224
+ # Submit answers
225
+ agent_code = f"https://huggingface.co/spaces/{username}/Gaia-Test-Agent/tree/main"
226
+ result = agent.submit_answers(username, agent_code, answers)
227
 
228
  progress(1.0, desc="Complete!")
229
 
230
+ if "error" in result:
231
+ return f"Submission failed: {result['error']}"
232
+ else:
233
+ score = result.get("score", 0)
234
+ return f"Evaluation complete!\nScore: {score}%\nAnswers submitted: {len(answers)}\nCheck the leaderboard for your ranking!"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
235
 
236
  # Create Gradio interface
237
  def create_gradio_app():
238
+ with gr.Blocks(title="GAIA Test Agent", theme=gr.themes.Soft()) as app:
 
 
 
 
 
 
 
 
 
 
 
 
 
239
 
240
  gr.HTML("""
241
+ <div style="text-align: center; background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); color: white; padding: 20px; border-radius: 10px; margin-bottom: 20px;">
242
+ <h1>GAIA Test Agent</h1>
243
+ <p>AI Agent for GAIA Benchmark Evaluation</p>
244
  </div>
245
  """)
246
 
247
+ with gr.Tab("Single Question Test"):
248
+ test_btn = gr.Button("Test Random Question", variant="primary")
249
+ test_output = gr.Textbox(
250
+ label="Test Result",
251
+ lines=10,
252
+ placeholder="Test results will appear here..."
253
+ )
254
+
255
+ test_btn.click(
256
+ fn=test_single_question,
257
+ outputs=[test_output]
258
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259
 
260
+ with gr.Tab("Full Evaluation"):
261
+ gr.Markdown("### Run Full GAIA Evaluation")
262
+
263
+ username_input = gr.Textbox(
264
+ label="Hugging Face Username",
265
+ placeholder="Enter your HF username",
266
+ info="This will be used for the leaderboard"
267
+ )
268
+
269
+ eval_btn = gr.Button("Run Full Evaluation", variant="primary")
270
+ eval_output = gr.Textbox(
271
+ label="Evaluation Results",
272
+ lines=15,
273
+ placeholder="Evaluation results will appear here..."
274
+ )
275
+
276
+ eval_btn.click(
277
+ fn=run_full_evaluation,
278
+ inputs=[username_input],
279
+ outputs=[eval_output],
280
+ show_progress=True
281
  )
282
 
283
+ with gr.Tab("Manual Testing"):
284
+ gr.Markdown("### Test Individual Questions")
 
285
 
286
+ manual_input = gr.Textbox(
287
+ label="Enter Question",
288
+ placeholder="Type a question to test...",
 
289
  lines=3
290
  )
291
 
292
+ manual_btn = gr.Button("Get Answer", variant="primary")
293
+ manual_output = gr.Textbox(
294
+ label="Answer",
295
+ lines=5,
296
+ placeholder="Answer will appear here..."
297
+ )
298
 
299
+ manual_btn.click(
300
+ fn=run_agent,
301
+ inputs=[manual_input],
302
+ outputs=[manual_output]
 
 
303
  )
304
 
305
  gr.Markdown("""
306
  ### How to Use:
307
+ 1. **Single Question Test**: Test your agent with one random question from GAIA
308
+ 2. **Full Evaluation**: Run the complete evaluation and submit to leaderboard
309
+ 3. **Manual Testing**: Test your agent with custom questions
310
+
311
+ ### Requirements:
312
+ - Set your OpenRouter API key in Space secrets as `OPENROUTER_API_KEY`
313
+ - Keep your Space public for leaderboard verification
314
+ - Your HF username will appear on the leaderboard
315
  """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
316
 
317
  return app
318
 
 
320
  if __name__ == "__main__":
321
  app = create_gradio_app()
322
 
 
323
  if os.getenv("SPACE_ID"):
324
+ app.launch(server_name="0.0.0.0", server_port=7860, show_api=False)
 
 
 
 
 
325
  else:
326
+ app.launch(share=True, show_api=False)