bhotta commited on
Commit
4054356
Β·
verified Β·
1 Parent(s): 4bd93f1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +642 -423
app.py CHANGED
@@ -1,506 +1,725 @@
1
  import os
2
- import gradio as gr
 
 
 
 
3
  import requests
4
  import pandas as pd
5
- from smolagents import CodeAgent, OpenAIServerModel, tool
6
  from openai import OpenAI
7
- import base64
8
 
9
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
 
11
- # --- Custom Tools ---
12
 
13
- @tool
14
- def get_youtube_transcript(video_url: str) -> str:
15
- """Fetch the transcript/captions of a YouTube video.
16
-
17
- Args:
18
- video_url: The full YouTube video URL e.g. https://www.youtube.com/watch?v=XXXXX
19
- """
20
- try:
21
- from youtube_transcript_api import YouTubeTranscriptApi
22
 
23
- if "v=" in video_url:
24
- video_id = video_url.split("v=")[-1].split("&")[0]
25
- elif "youtu.be/" in video_url:
26
- video_id = video_url.split("youtu.be/")[-1].split("?")[0]
27
- else:
28
- return "Could not extract video ID."
29
 
30
- ytt_api = YouTubeTranscriptApi()
31
- fetched = ytt_api.fetch(video_id)
32
- transcript_text = " ".join([t.text for t in fetched])
33
- return transcript_text[:8000]
 
 
34
 
35
- except Exception as e:
36
- # Fallback: try fetching via youtubetotranscript
37
- try:
38
- if "v=" in video_url:
39
- video_id = video_url.split("v=")[-1].split("&")[0]
40
- elif "youtu.be/" in video_url:
41
- video_id = video_url.split("youtu.be/")[-1].split("?")[0]
42
- else:
43
- video_id = video_url
44
 
45
- resp = requests.get(
46
- f"https://youtubetotranscript.com/transcript?v={video_id}",
47
- headers={"User-Agent": "Mozilla/5.0"},
48
- timeout=15
49
- )
50
- if resp.status_code == 200:
51
- import re
52
- text = re.sub(r'<[^>]+>', ' ', resp.text)
53
- text = re.sub(r'\s+', ' ', text).strip()
54
- return text[:6000]
55
- except Exception:
56
- pass
57
- return f"Transcript fetch failed: {e}"
58
 
 
 
 
59
 
60
- @tool
61
- def wikipedia_fetch_page(page_title: str) -> str:
62
- """Fetch the full content of a specific Wikipedia page by its exact title.
63
-
64
- Args:
65
- page_title: The exact Wikipedia page title, e.g. 'Mercedes Sosa' or 'Mercedes Sosa discography'.
66
- """
67
- import time
68
- time.sleep(1)
69
 
70
- headers = {
71
- "User-Agent": "GaiaResearchBot/1.0 (huggingface educational project)"
72
- }
73
 
74
- try:
75
- params = {
76
- "action": "query",
77
- "titles": page_title,
78
- "prop": "extracts",
79
- "explaintext": True,
80
- "exsectionformat": "plain",
81
- "format": "json",
82
- "redirects": 1,
83
- }
84
- resp = requests.get(
85
- "https://en.wikipedia.org/w/api.php",
86
- params=params,
87
- headers=headers,
88
- timeout=20
89
- )
90
- resp.raise_for_status()
91
- data = resp.json()
92
- pages = data.get("query", {}).get("pages", {})
93
- for pid, page in pages.items():
94
- if pid == "-1":
95
- return f"Page '{page_title}' not found on Wikipedia."
96
- return page.get("extract", "No content.")[:10000]
97
- except Exception:
98
- pass
99
-
100
- try:
101
- url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{page_title.replace(' ', '_')}"
102
- resp = requests.get(url, headers=headers, timeout=15)
103
- data = resp.json()
104
- return data.get("extract", "No summary found.")
105
- except Exception as e:
106
- return f"Failed to fetch Wikipedia page: {e}"
107
-
108
-
109
- @tool
110
- def web_search(query: str) -> str:
111
- """Search the web using a query string. Returns search results as text.
112
- Use this for general web searches. For Wikipedia, prefer wikipedia_fetch_page instead.
113
-
114
- Args:
115
- query: The search query string. Be very specific, include full names to avoid ambiguity.
116
- """
117
- import time
118
 
119
- for attempt in range(3):
120
- try:
121
- from duckduckgo_search import DDGS
122
- with DDGS() as ddgs:
123
- results = list(ddgs.text(query, max_results=8))
124
- if results:
125
- return "\n\n".join(
126
- f"Title: {r['title']}\nURL: {r['href']}\nSnippet: {r['body']}"
127
- for r in results[:6]
128
- )
129
- except Exception as e:
130
- if attempt < 2:
131
- time.sleep(3)
132
- continue
133
- return f"Search unavailable after retries: {e}"
134
-
135
- return "Search unavailable. Try wikipedia_fetch_page or visit_webpage instead."
136
-
137
-
138
- @tool
139
- def visit_webpage(url: str) -> str:
140
- """Fetch the text content of a webpage. Use this to read full page content from a URL.
141
- Tries direct fetch then Wayback Machine as fallback.
142
-
143
- Args:
144
- url: The full URL of the webpage to fetch.
145
- """
146
- import re
147
- import time
148
-
149
- # For Wikipedia URLs, use the API instead
150
- if "wikipedia.org/wiki/" in url:
151
- page_title = url.split("/wiki/")[-1].replace("_", " ")
152
- # Remove URL fragments
153
- page_title = page_title.split("#")[0]
154
- return wikipedia_fetch_page(page_title)
155
-
156
- # Sites known to block scrapers β€” go straight to Wayback Machine
157
- blocked = [
158
- "genius.com", "rateyourmusic.com", "discogs.com",
159
- "allmusic.com", "albumoftheyear.org", "famousfix.com",
160
- "spotify.com", "apple.com/music"
161
- ]
162
- use_wayback = any(b in url for b in blocked)
163
 
164
- headers = {
165
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
166
- "(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
167
- "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
168
- "Accept-Language": "en-US,en;q=0.5",
169
- }
170
 
171
- if not use_wayback:
 
172
  try:
173
- resp = requests.get(url, headers=headers, timeout=20)
174
- if resp.status_code == 200:
175
- text = re.sub(r'<[^>]+>', ' ', resp.text)
176
- text = re.sub(r'\s+', ' ', text).strip()
177
- return text[:8000]
178
  except Exception:
179
  pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
 
181
- # Wayback Machine fallback
182
- try:
183
- wb_api = f"https://archive.org/wayback/available?url={url}&timestamp=20221201"
184
- wb_resp = requests.get(wb_api, timeout=10)
185
- snapshot = wb_resp.json().get("archived_snapshots", {}).get("closest", {})
186
- snapshot_url = snapshot.get("url")
187
- if snapshot_url:
188
- time.sleep(1)
189
- snap_resp = requests.get(snapshot_url, headers=headers, timeout=20)
190
- if snap_resp.status_code == 200:
191
- text = re.sub(r'<[^>]+>', ' ', snap_resp.text)
192
- text = re.sub(r'\s+', ' ', text).strip()
193
- return f"[Via Wayback Machine]\n{text[:8000]}"
194
- except Exception as e:
195
- return f"Could not fetch {url}: {e}"
196
-
197
- return f"Could not fetch {url}"
198
-
199
-
200
- @tool
201
- def analyze_image_from_url(image_url: str, question: str) -> str:
202
- """Analyze an image from a URL using GPT-4o vision and answer a question about it.
203
- Only use this for direct image URLs ending in .png, .jpg, .jpeg, .gif, .webp etc.
204
- Do NOT use this for YouTube video URLs.
205
-
206
- Args:
207
- image_url: The direct URL to the image file to analyze.
208
- question: The question to answer about the image content.
209
- """
210
- client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
211
- try:
212
- response = client.chat.completions.create(
213
  model="gpt-4o",
214
  messages=[{
215
  "role": "user",
216
  "content": [
217
- {"type": "image_url", "image_url": {"url": image_url}},
218
- {"type": "text", "text": question}
219
- ]
 
 
220
  }],
221
- max_tokens=500
 
222
  )
223
- return response.choices[0].message.content
224
- except Exception as e:
225
- return f"Image analysis failed: {e}"
226
-
227
-
228
- @tool
229
- def analyze_task_file(task_id: str, question: str) -> str:
230
- """Download and analyze a file attached to a GAIA task.
231
- ALWAYS call this first for every task. Returns 'NO_FILE_ATTACHED' if no file exists.
232
- If it returns 'NO_FILE_ATTACHED', then use web_search or wikipedia_fetch_page instead.
233
-
234
- Args:
235
- task_id: The GAIA task ID used to fetch the associated file.
236
- question: The question to answer based on the file content.
237
- """
238
- api_url = DEFAULT_API_URL
239
- file_url = f"{api_url}/files/{task_id}"
240
- client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
241
-
242
- try:
243
- resp = requests.get(file_url, timeout=30)
244
- if resp.status_code == 404:
245
- return "NO_FILE_ATTACHED"
246
- resp.raise_for_status()
247
-
248
- content_type = resp.headers.get("content-type", "").lower()
249
- file_bytes = resp.content
250
-
251
- # Image files
252
- if any(x in content_type for x in ["image", "png", "jpeg", "jpg", "gif", "webp"]):
253
- b64 = base64.b64encode(file_bytes).decode()
254
- data_url = f"data:{content_type.split(';')[0]};base64,{b64}"
255
- response = client.chat.completions.create(
256
- model="gpt-4o",
257
- messages=[{"role": "user", "content": [
258
- {"type": "image_url", "image_url": {"url": data_url}},
259
- {"type": "text", "text": question}
260
- ]}],
261
- max_tokens=1000
262
- )
263
- return response.choices[0].message.content
264
-
265
- # Text / CSV / JSON / HTML
266
- elif any(x in content_type for x in ["text", "csv", "json", "html", "xml"]):
267
- text_content = file_bytes.decode("utf-8", errors="ignore")[:12000]
268
- response = client.chat.completions.create(
269
- model="gpt-4o",
270
- messages=[{"role": "user", "content": f"File content:\n{text_content}\n\nQuestion: {question}"}],
271
- max_tokens=1000
272
- )
273
- return response.choices[0].message.content
274
-
275
- # Audio files
276
- elif any(x in content_type for x in ["audio", "mp3", "wav", "m4a", "ogg", "mpeg"]):
277
- import tempfile
278
- ext = content_type.split("/")[-1].split(";")[0]
279
- if ext not in ["mp3", "wav", "m4a", "ogg", "webm", "flac"]:
280
- ext = "mp3"
281
- with tempfile.NamedTemporaryFile(suffix=f".{ext}", delete=False) as f:
282
- f.write(file_bytes)
283
  fname = f.name
284
- with open(fname, "rb") as audio_file:
285
- transcript = client.audio.transcriptions.create(
286
- model="whisper-1", file=audio_file
287
- )
288
- os.unlink(fname)
289
- # Now answer the question using the transcript
290
- response = client.chat.completions.create(
291
- model="gpt-4o",
292
- messages=[{"role": "user", "content": f"Audio transcript:\n{transcript.text}\n\nQuestion: {question}"}],
293
- max_tokens=500
294
  )
295
- return response.choices[0].message.content
 
 
 
 
 
 
 
 
296
 
297
- # Excel / spreadsheet
298
- elif any(x in content_type for x in ["excel", "spreadsheet", "xlsx", "xls",
299
- "openxmlformats", "ms-excel"]):
300
- import tempfile
 
 
301
  import io
302
- with tempfile.NamedTemporaryFile(suffix=".xlsx", delete=False) as f:
303
- f.write(file_bytes)
304
- fname = f.name
305
- try:
306
- # Actually read the Excel file
307
- xl = pd.read_excel(fname, sheet_name=None)
308
- all_text = []
309
- for sheet_name, df in xl.items():
310
- all_text.append(f"Sheet: {sheet_name}\n{df.to_string(index=False)}")
311
- combined = "\n\n".join(all_text)[:12000]
312
- os.unlink(fname)
313
- except Exception as ex:
314
- os.unlink(fname)
315
- return f"Could not parse Excel file: {ex}"
316
-
317
- response = client.chat.completions.create(
318
  model="gpt-4o",
319
- messages=[{"role": "user", "content": f"Spreadsheet data:\n{combined}\n\nQuestion: {question}"}],
320
- max_tokens=1000
 
 
 
 
 
 
 
 
321
  )
322
- return response.choices[0].message.content
 
 
323
 
324
- # PDF
325
- elif "pdf" in content_type:
326
- import tempfile
327
- with tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) as f:
328
- f.write(file_bytes)
 
 
 
 
 
 
 
 
 
 
 
 
329
  fname = f.name
330
- try:
331
- import PyPDF2
332
- with open(fname, "rb") as pdf_file:
333
- reader = PyPDF2.PdfReader(pdf_file)
334
- text = "\n".join(page.extract_text() or "" for page in reader.pages)
335
- os.unlink(fname)
336
- except Exception:
337
- # Fallback: send as base64 image of first page
338
- os.unlink(fname)
339
- b64 = base64.b64encode(file_bytes).decode()
340
- response = client.chat.completions.create(
341
- model="gpt-4o",
342
- messages=[{"role": "user", "content": f"I have a PDF file (base64, {len(b64)} chars). Question: {question}. Please note I cannot display the PDF directly."}],
343
- max_tokens=500
344
  )
345
- return response.choices[0].message.content
346
-
347
- response = client.chat.completions.create(
348
- model="gpt-4o",
349
- messages=[{"role": "user", "content": f"PDF content:\n{text[:12000]}\n\nQuestion: {question}"}],
350
- max_tokens=1000
351
- )
352
- return response.choices[0].message.content
353
 
354
- else:
355
- # Unknown type β€” try decoding as text
356
- try:
357
- text_content = file_bytes.decode("utf-8", errors="ignore")[:8000]
358
- if text_content.strip():
359
- response = client.chat.completions.create(
360
- model="gpt-4o",
361
- messages=[{"role": "user", "content": f"File content:\n{text_content}\n\nQuestion: {question}"}],
362
- max_tokens=500
363
- )
364
- return response.choices[0].message.content
365
- except Exception:
366
- pass
367
- return f"File downloaded ({len(file_bytes)} bytes, type: {content_type}) but format not supported."
368
-
369
- except requests.exceptions.HTTPError as e:
370
- if e.response is not None and e.response.status_code == 404:
371
- return "NO_FILE_ATTACHED"
372
- return "NO_FILE_ATTACHED"
373
- except Exception as e:
374
- return "NO_FILE_ATTACHED"
375
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
376
 
377
- # --- Agent ---
 
 
 
 
 
 
 
378
 
379
- class BasicAgent:
380
- def __init__(self):
381
- api_key = os.getenv("OPENAI_API_KEY")
382
- if not api_key:
383
- raise ValueError("OPENAI_API_KEY is missing!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
384
 
385
- self.model = OpenAIServerModel(
386
- model_id="gpt-4o",
387
- api_key=api_key
388
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
389
 
390
- self.agent = CodeAgent(
391
- tools=[
392
- web_search,
393
- visit_webpage,
394
- wikipedia_fetch_page,
395
- analyze_image_from_url,
396
- analyze_task_file,
397
- get_youtube_transcript,
398
- ],
399
- model=self.model,
400
- add_base_tools=True,
401
- max_steps=15,
402
- )
403
- print("βœ… OpenAI-powered Agent initialized.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
404
 
405
  def __call__(self, question: str, task_id: str = "") -> str:
406
- print(f"DEBUG: Agent received question: {question[:100]}...")
407
-
408
- prompt = (
409
- f"You are a precise research agent solving GAIA benchmark tasks.\n"
410
- f"Task ID: {task_id}\n"
411
- f"Task: {question}\n\n"
412
- "Instructions:\n"
413
- "- ALWAYS call analyze_task_file first for every task before doing anything else.\n"
414
- " * If it returns a real answer (not 'NO_FILE_ATTACHED'), use that answer.\n"
415
- " * If it returns 'NO_FILE_ATTACHED', proceed with web_search or wikipedia_fetch_page.\n"
416
- "- For Wikipedia lookups, ALWAYS use wikipedia_fetch_page with the EXACT article title.\n"
417
- " GOOD: wikipedia_fetch_page('Mercedes Sosa')\n"
418
- " GOOD: wikipedia_fetch_page('Giganotosaurus')\n"
419
- " BAD: web_search('Mercedes Sosa wikipedia')\n"
420
- "- When web searching, use full specific names/phrases to avoid ambiguity.\n"
421
- "- For YouTube video links, ALWAYS call get_youtube_transcript(video_url) FIRST.\n"
422
- " If transcript fetch fails, do web_search with the video ID or title.\n"
423
- "- To read a full webpage, use visit_webpage(url).\n"
424
- "- For chess positions in images, analyze_task_file will return the move directly.\n"
425
- "- Provide ONLY the final direct answer. No explanations, no punctuation unless needed.\n"
426
- " Examples: '42', 'FunkMonk', 'right', 'Louvrier', 'b,e'\n"
427
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
428
  try:
429
- result = self.agent.run(prompt)
430
- return str(result).strip()
431
- except Exception as e:
432
- print(f"❌ Error: {e}")
433
- return "Error finding answer."
 
 
 
 
 
 
434
 
435
 
436
- # --- Gradio + Submission ---
437
 
438
  def run_and_submit_all(profile: gr.OAuthProfile | None):
439
- if profile:
440
- username = f"{profile.username}"
441
- print(f"Logged in as: {username}")
442
- else:
443
- return "Please Login to Hugging Face first.", None
444
 
445
- space_id = os.getenv("SPACE_ID")
 
446
  api_url = DEFAULT_API_URL
447
- questions_url = f"{api_url}/questions"
448
- submit_url = f"{api_url}/submit"
449
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
450
 
451
  try:
452
  agent = BasicAgent()
453
  except Exception as e:
454
- return f"Initialization Failed: {e}", None
455
 
456
  try:
457
- response = requests.get(questions_url, timeout=15)
458
- response.raise_for_status()
459
- questions_data = response.json()
460
  except Exception as e:
461
  return f"Error fetching questions: {e}", None
462
 
463
- results_log = []
464
- answers_payload = []
465
 
466
  for item in questions_data:
467
  task_id = item.get("task_id", "")
468
  question_text = item.get("question", "")
469
  try:
470
- submitted_answer = agent(question_text, task_id=task_id)
471
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
472
- results_log.append({"Task ID": task_id, "Question": question_text[:80], "Answer": submitted_answer})
473
  except Exception as e:
474
- results_log.append({"Task ID": task_id, "Question": question_text[:80], "Answer": f"Error: {e}"})
 
475
 
476
- submission_data = {
477
- "username": username.strip(),
478
- "agent_code": agent_code,
479
- "answers": answers_payload
480
- }
 
481
 
482
  try:
483
- response = requests.post(submit_url, json=submission_data, timeout=60)
484
- response.raise_for_status()
485
- res = response.json()
 
 
 
 
 
 
 
 
486
  status = (
487
- f"Submission Successful!\n"
488
- f"Score: {res.get('score')}% ({res.get('correct_count')}/{res.get('total_attempted')})\n"
 
489
  f"Message: {res.get('message')}"
490
  )
491
- return status, pd.DataFrame(results_log)
492
  except Exception as e:
493
- return f"Submission Failed: {e}", pd.DataFrame(results_log)
 
 
494
 
495
 
496
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
497
  gr.Markdown("# πŸ€– GAIA Agent Evaluation")
498
- gr.Markdown("Click Login, then Run to evaluate your agent on the GAIA dataset.")
 
 
 
499
  gr.LoginButton()
500
  run_button = gr.Button("πŸš€ Run Evaluation & Submit", variant="primary")
501
- status_output = gr.Textbox(label="Status", lines=4)
502
- results_table = gr.DataFrame(label="Agent Performance Log")
503
- run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
 
504
 
505
  if __name__ == "__main__":
506
- demo.launch(ssr_mode=False)
 
1
  import os
2
+ import re
3
+ import json
4
+ import base64
5
+ import subprocess
6
+ import tempfile
7
  import requests
8
  import pandas as pd
9
+ import gradio as gr
10
  from openai import OpenAI
 
11
 
12
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
13
 
 
14
 
15
+ # ── helpers ───────────────────────────────────────────────────────────────────
 
 
 
 
 
 
 
 
16
 
17
+ def _strip_html(html: str) -> str:
18
+ from html.parser import HTMLParser
 
 
 
 
19
 
20
+ class _P(HTMLParser):
21
+ def __init__(self):
22
+ super().__init__()
23
+ self.parts = []
24
+ self._skip = False
25
+ self._skip_tags = {"script", "style", "nav", "footer", "head"}
26
 
27
+ def handle_starttag(self, tag, attrs):
28
+ if tag in self._skip_tags:
29
+ self._skip = True
 
 
 
 
 
 
30
 
31
+ def handle_endtag(self, tag):
32
+ if tag in self._skip_tags:
33
+ self._skip = False
 
 
 
 
 
 
 
 
 
 
34
 
35
+ def handle_data(self, data):
36
+ if not self._skip and data.strip():
37
+ self.parts.append(data.strip())
38
 
39
+ p = _P()
40
+ p.feed(html)
41
+ return " ".join(p.parts)
 
 
 
 
 
 
42
 
 
 
 
43
 
44
+ # ── agent ──────────────────────────────────────────────────────���──────────────
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
+ class BasicAgent:
47
+ def __init__(self):
48
+ api_key = os.getenv("OPENAI_API_KEY")
49
+ if not api_key:
50
+ raise ValueError("OPENAI_API_KEY missing.")
51
+ self.client = OpenAI(api_key=api_key)
52
+ self.api_url = DEFAULT_API_URL
53
+ print("βœ… Agent initialised.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
+ # ── raw file fetch ────────────────────────────────────────────────────────
 
 
 
 
 
56
 
57
+ def _fetch_file(self, task_id: str):
58
+ """Return (bytes, content_type) or (None, '')."""
59
  try:
60
+ r = requests.get(f"{self.api_url}/files/{task_id}", timeout=15)
61
+ if r.status_code == 200 and r.content:
62
+ return r.content, r.headers.get("Content-Type", "")
 
 
63
  except Exception:
64
  pass
65
+ return None, ""
66
+
67
+ # ── tools (called by the loop) ────────────────────────────────────────────
68
+
69
+ def tool_check_file(self, task_id: str) -> str:
70
+ """Tell the model whether a file exists and what type it is."""
71
+ fb, ct = self._fetch_file(task_id)
72
+ if not fb:
73
+ return "NO_FILE"
74
+ ct_clean = ct.split(";")[0].strip().lower()
75
+ return (
76
+ f"FILE_EXISTS type={ct_clean} size={len(fb)}_bytes. "
77
+ f"Use the appropriate tool to read it: "
78
+ f"image→analyse_image, python→run_python_file, "
79
+ f"excel/xlsx→read_excel_file, audio→transcribe_audio, "
80
+ f"text/pdf→read_text_file."
81
+ )
82
 
83
+ def tool_analyse_image(self, task_id: str, question: str) -> str:
84
+ """Pass the image to GPT-4o vision and return its answer."""
85
+ fb, ct = self._fetch_file(task_id)
86
+ if not fb:
87
+ return "No image found."
88
+ ct_clean = ct.split(";")[0].strip()
89
+ if "image" not in ct_clean:
90
+ return f"File is not an image (type={ct_clean})."
91
+ b64 = base64.b64encode(fb).decode()
92
+ resp = self.client.chat.completions.create(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  model="gpt-4o",
94
  messages=[{
95
  "role": "user",
96
  "content": [
97
+ {"type": "image_url",
98
+ "image_url": {"url": f"data:{ct_clean};base64,{b64}",
99
+ "detail": "high"}},
100
+ {"type": "text", "text": question},
101
+ ],
102
  }],
103
+ max_tokens=800,
104
+ temperature=0,
105
  )
106
+ return resp.choices[0].message.content or "No response."
107
+
108
+ def tool_run_python_file(self, task_id: str) -> str:
109
+ """Download the Python file, execute it, return stdout/stderr."""
110
+ fb, ct = self._fetch_file(task_id)
111
+ if not fb:
112
+ return "No file found."
113
+ code = fb.decode("utf-8", errors="ignore")
114
+ try:
115
+ with tempfile.NamedTemporaryFile(suffix=".py", delete=False,
116
+ mode="w") as f:
117
+ f.write(code)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  fname = f.name
119
+ result = subprocess.run(
120
+ ["python3", fname],
121
+ capture_output=True, text=True, timeout=30
 
 
 
 
 
 
 
122
  )
123
+ out = result.stdout.strip()
124
+ err = result.stderr.strip()
125
+ if out:
126
+ return f"STDOUT:\n{out}"
127
+ if err:
128
+ return f"STDERR:\n{err}"
129
+ return "No output."
130
+ except Exception as e:
131
+ return f"Execution error: {e}"
132
 
133
+ def tool_read_excel_file(self, task_id: str, question: str) -> str:
134
+ """Download xlsx/csv, load with pandas, let GPT-4o answer about it."""
135
+ fb, ct = self._fetch_file(task_id)
136
+ if not fb:
137
+ return "No file found."
138
+ try:
139
  import io
140
+ ct_clean = ct.split(";")[0].strip().lower()
141
+ if "csv" in ct_clean or "text" in ct_clean:
142
+ df = pd.read_csv(io.BytesIO(fb))
143
+ else:
144
+ df = pd.read_excel(io.BytesIO(fb))
145
+ preview = df.to_string(max_rows=60, max_cols=20)
146
+ # Ask GPT-4o to answer the question from the data
147
+ resp = self.client.chat.completions.create(
 
 
 
 
 
 
 
 
148
  model="gpt-4o",
149
+ messages=[{
150
+ "role": "user",
151
+ "content": (
152
+ f"Here is a spreadsheet (first 60 rows):\n\n{preview}\n\n"
153
+ f"Question: {question}\n"
154
+ f"Answer with ONLY the final value, no explanation."
155
+ ),
156
+ }],
157
+ max_tokens=200,
158
+ temperature=0,
159
  )
160
+ return resp.choices[0].message.content or "No answer."
161
+ except Exception as e:
162
+ return f"Excel read error: {e}"
163
 
164
+ def tool_transcribe_audio(self, task_id: str) -> str:
165
+ """Download audio and transcribe with Whisper."""
166
+ fb, ct = self._fetch_file(task_id)
167
+ if not fb:
168
+ return "No file found."
169
+ try:
170
+ # Guess extension
171
+ ct_clean = ct.split(";")[0].strip().lower()
172
+ ext_map = {
173
+ "audio/mpeg": ".mp3", "audio/mp3": ".mp3",
174
+ "audio/wav": ".wav", "audio/x-wav": ".wav",
175
+ "audio/ogg": ".ogg", "audio/flac": ".flac",
176
+ "audio/m4a": ".m4a", "audio/mp4": ".mp4",
177
+ }
178
+ ext = ext_map.get(ct_clean, ".mp3")
179
+ with tempfile.NamedTemporaryFile(suffix=ext, delete=False) as f:
180
+ f.write(fb)
181
  fname = f.name
182
+ with open(fname, "rb") as audio_f:
183
+ transcript = self.client.audio.transcriptions.create(
184
+ model="whisper-1", file=audio_f
 
 
 
 
 
 
 
 
 
 
 
185
  )
186
+ return transcript.text
187
+ except Exception as e:
188
+ return f"Transcription error: {e}"
 
 
 
 
 
189
 
190
+ def tool_read_text_file(self, task_id: str) -> str:
191
+ """Read text/PDF file content."""
192
+ fb, ct = self._fetch_file(task_id)
193
+ if not fb:
194
+ return "No file found."
195
+ try:
196
+ ct_clean = ct.split(";")[0].strip().lower()
197
+ if "pdf" in ct_clean:
198
+ # Try pdfminer or just decode bytes
199
+ try:
200
+ import pdfminer.high_level
201
+ import io
202
+ text = pdfminer.high_level.extract_text(io.BytesIO(fb))
203
+ return text[:6000]
204
+ except ImportError:
205
+ pass
206
+ return fb.decode("utf-8", errors="ignore")[:6000]
207
+ except Exception as e:
208
+ return f"Read error: {e}"
 
 
209
 
210
+ def tool_search_web(self, query: str) -> str:
211
+ """DuckDuckGo HTML search – stable from cloud IPs."""
212
+ try:
213
+ hdrs = {
214
+ "User-Agent": (
215
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
216
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
217
+ "Chrome/124.0 Safari/537.36"
218
+ )
219
+ }
220
+ r = requests.get(
221
+ "https://html.duckduckgo.com/html/",
222
+ params={"q": query}, headers=hdrs, timeout=12,
223
+ )
224
+ from html.parser import HTMLParser
225
+
226
+ class _DDG(HTMLParser):
227
+ def __init__(self):
228
+ super().__init__()
229
+ self.results = []
230
+ self._in = False
231
+ self._cur = ""
232
+
233
+ def handle_starttag(self, tag, attrs):
234
+ d = dict(attrs)
235
+ if "result__snippet" in d.get("class", ""):
236
+ self._in = True
237
+ self._cur = ""
238
+
239
+ def handle_data(self, data):
240
+ if self._in:
241
+ self._cur += data
242
+
243
+ def handle_endtag(self, tag):
244
+ if self._in:
245
+ t = self._cur.strip()
246
+ if t:
247
+ self.results.append(t)
248
+ self._in = False
249
+
250
+ p = _DDG()
251
+ p.feed(r.text)
252
+ return "\n\n".join(p.results[:6]) or "No results."
253
+ except Exception as e:
254
+ return f"Search error: {e}"
255
 
256
+ def tool_fetch_webpage(self, url: str) -> str:
257
+ try:
258
+ hdrs = {"User-Agent": "Mozilla/5.0 Chrome/124.0"}
259
+ r = requests.get(url, headers=hdrs, timeout=18)
260
+ r.raise_for_status()
261
+ return _strip_html(r.text)[:8000]
262
+ except Exception as e:
263
+ return f"Fetch error: {e}"
264
 
265
+ def tool_fetch_wikipedia(self, title: str) -> str:
266
+ """Use Wikipedia REST API (no 403 issues)."""
267
+ try:
268
+ slug = requests.utils.quote(title.replace(" ", "_"))
269
+ r = requests.get(
270
+ f"https://en.wikipedia.org/api/rest_v1/page/summary/{slug}",
271
+ timeout=12,
272
+ )
273
+ if r.status_code == 200:
274
+ data = r.json()
275
+ return data.get("extract", "Not found.")
276
+ # Fallback: full extract via w/api.php
277
+ r2 = requests.get(
278
+ "https://en.wikipedia.org/w/api.php",
279
+ params={
280
+ "action": "query", "prop": "extracts",
281
+ "titles": title, "format": "json", "redirects": 1,
282
+ },
283
+ timeout=12,
284
+ )
285
+ pages = r2.json().get("query", {}).get("pages", {})
286
+ for page in pages.values():
287
+ text = _strip_html(page.get("extract", ""))
288
+ if text:
289
+ return text[:7000]
290
+ except Exception as e:
291
+ return f"Wikipedia error: {e}"
292
+ return "Not found."
293
 
294
+ def tool_youtube_transcript(self, video_url: str) -> str:
295
+ try:
296
+ from youtube_transcript_api import YouTubeTranscriptApi
297
+ vid = re.search(r"v=([^&]+)", video_url)
298
+ if not vid:
299
+ return "Bad URL."
300
+ entries = YouTubeTranscriptApi.get_transcript(vid.group(1))
301
+ return " ".join(e["text"] for e in entries)[:6000]
302
+ except Exception as e:
303
+ err = str(e)
304
+ if any(k in err.lower() for k in
305
+ ("blocked", "ip", "cloud", "requestblocked", "ipblocked")):
306
+ return (
307
+ "BLOCKED: YouTube blocks cloud IPs. "
308
+ "Use search_web to find transcript/description of this video. "
309
+ "Search for the video title + key phrase from the question."
310
+ )
311
+ return f"Transcript error: {err}"
312
+
313
+ # ── tool dispatch ─────────────────────────────────────────────────────────
314
+
315
+ TOOLS = [
316
+ {
317
+ "type": "function",
318
+ "function": {
319
+ "name": "check_file",
320
+ "description": (
321
+ "ALWAYS call this first. Checks if a file is attached to the task. "
322
+ "Returns 'NO_FILE' or info about the file type and how to read it."
323
+ ),
324
+ "parameters": {
325
+ "type": "object",
326
+ "properties": {"task_id": {"type": "string"}},
327
+ "required": ["task_id"],
328
+ },
329
+ },
330
+ },
331
+ {
332
+ "type": "function",
333
+ "function": {
334
+ "name": "analyse_image",
335
+ "description": (
336
+ "Analyse an image file attached to the task using GPT-4o vision. "
337
+ "Use for chess boards, diagrams, photos, screenshots."
338
+ ),
339
+ "parameters": {
340
+ "type": "object",
341
+ "properties": {
342
+ "task_id": {"type": "string"},
343
+ "question": {"type": "string",
344
+ "description": "What to find/answer from the image."},
345
+ },
346
+ "required": ["task_id", "question"],
347
+ },
348
+ },
349
+ },
350
+ {
351
+ "type": "function",
352
+ "function": {
353
+ "name": "run_python_file",
354
+ "description": (
355
+ "Execute the Python file attached to the task and return its output. "
356
+ "Use when the task asks for the output of Python code."
357
+ ),
358
+ "parameters": {
359
+ "type": "object",
360
+ "properties": {"task_id": {"type": "string"}},
361
+ "required": ["task_id"],
362
+ },
363
+ },
364
+ },
365
+ {
366
+ "type": "function",
367
+ "function": {
368
+ "name": "read_excel_file",
369
+ "description": (
370
+ "Read an Excel or CSV file attached to the task and answer "
371
+ "a question about its data."
372
+ ),
373
+ "parameters": {
374
+ "type": "object",
375
+ "properties": {
376
+ "task_id": {"type": "string"},
377
+ "question": {"type": "string"},
378
+ },
379
+ "required": ["task_id", "question"],
380
+ },
381
+ },
382
+ },
383
+ {
384
+ "type": "function",
385
+ "function": {
386
+ "name": "transcribe_audio",
387
+ "description": (
388
+ "Transcribe an audio file attached to the task using Whisper. "
389
+ "Use for voice memos, recordings, audio questions."
390
+ ),
391
+ "parameters": {
392
+ "type": "object",
393
+ "properties": {"task_id": {"type": "string"}},
394
+ "required": ["task_id"],
395
+ },
396
+ },
397
+ },
398
+ {
399
+ "type": "function",
400
+ "function": {
401
+ "name": "read_text_file",
402
+ "description": "Read a text or PDF file attached to the task.",
403
+ "parameters": {
404
+ "type": "object",
405
+ "properties": {"task_id": {"type": "string"}},
406
+ "required": ["task_id"],
407
+ },
408
+ },
409
+ },
410
+ {
411
+ "type": "function",
412
+ "function": {
413
+ "name": "youtube_transcript",
414
+ "description": (
415
+ "Fetch YouTube video transcript. If cloud-blocked, "
416
+ "returns instructions to use search_web instead."
417
+ ),
418
+ "parameters": {
419
+ "type": "object",
420
+ "properties": {"video_url": {"type": "string"}},
421
+ "required": ["video_url"],
422
+ },
423
+ },
424
+ },
425
+ {
426
+ "type": "function",
427
+ "function": {
428
+ "name": "search_web",
429
+ "description": "Search the web via DuckDuckGo. Returns top snippets.",
430
+ "parameters": {
431
+ "type": "object",
432
+ "properties": {"query": {"type": "string"}},
433
+ "required": ["query"],
434
+ },
435
+ },
436
+ },
437
+ {
438
+ "type": "function",
439
+ "function": {
440
+ "name": "fetch_webpage",
441
+ "description": "Fetch and read the full text content of any URL.",
442
+ "parameters": {
443
+ "type": "object",
444
+ "properties": {"url": {"type": "string"}},
445
+ "required": ["url"],
446
+ },
447
+ },
448
+ },
449
+ {
450
+ "type": "function",
451
+ "function": {
452
+ "name": "fetch_wikipedia",
453
+ "description": (
454
+ "Fetch a Wikipedia article by exact title. "
455
+ "Always use this instead of fetch_webpage for Wikipedia."
456
+ ),
457
+ "parameters": {
458
+ "type": "object",
459
+ "properties": {"title": {"type": "string"}},
460
+ "required": ["title"],
461
+ },
462
+ },
463
+ },
464
+ ]
465
 
466
+ def _dispatch(self, fn: str, args: dict, task_id: str, question: str) -> str:
467
+ if fn == "check_file":
468
+ return self.tool_check_file(args.get("task_id", task_id))
469
+ if fn == "analyse_image":
470
+ return self.tool_analyse_image(
471
+ args.get("task_id", task_id), args.get("question", question))
472
+ if fn == "run_python_file":
473
+ return self.tool_run_python_file(args.get("task_id", task_id))
474
+ if fn == "read_excel_file":
475
+ return self.tool_read_excel_file(
476
+ args.get("task_id", task_id), args.get("question", question))
477
+ if fn == "transcribe_audio":
478
+ return self.tool_transcribe_audio(args.get("task_id", task_id))
479
+ if fn == "read_text_file":
480
+ return self.tool_read_text_file(args.get("task_id", task_id))
481
+ if fn == "youtube_transcript":
482
+ return self.tool_youtube_transcript(args.get("video_url", ""))
483
+ if fn == "search_web":
484
+ return self.tool_search_web(args.get("query", ""))
485
+ if fn == "fetch_webpage":
486
+ return self.tool_fetch_webpage(args.get("url", ""))
487
+ if fn == "fetch_wikipedia":
488
+ return self.tool_fetch_wikipedia(args.get("title", ""))
489
+ return "Unknown tool."
490
+
491
+ # ── system prompt ─────────────────────────────────────────────────────────
492
+
493
+ SYSTEM = """You are a precise research agent solving GAIA benchmark tasks.
494
+
495
+ MANDATORY WORKFLOW β€” follow every step, no exceptions:
496
+
497
+ STEP 1 β€” Always call check_file(task_id) first, regardless of the question.
498
+ β€’ If NO_FILE β†’ go to STEP 2.
499
+ β€’ If FILE_EXISTS image β†’ call analyse_image(task_id, full_question).
500
+ β€’ If FILE_EXISTS python β†’ call run_python_file(task_id). The output IS the answer.
501
+ β€’ If FILE_EXISTS excel/xlsx/csv β†’ call read_excel_file(task_id, question).
502
+ β€’ If FILE_EXISTS audio β†’ call transcribe_audio(task_id), then answer from transcript.
503
+ β€’ If FILE_EXISTS text/pdf β†’ call read_text_file(task_id), then answer from content.
504
+ CRITICAL: NEVER return "NO_FILE" or any tool status string as your final answer.
505
+
506
+ STEP 2 β€” Gather information using tools.
507
+ β€’ YouTube URL in question β†’ call youtube_transcript(url) first.
508
+ If BLOCKED β†’ use search_web("video title + key phrase") to find the answer.
509
+ β€’ Wikipedia question β†’ call fetch_wikipedia("Exact Article Title").
510
+ For discography β†’ look at Studio albums table. Count ONLY solo studio albums.
511
+ Do NOT count: collaborations, live albums, compilations, EPs.
512
+ β€’ LibreTexts 1.E Exercises β†’ fetch_webpage with EXACT URL:
513
+ https://chem.libretexts.org/Bookshelves/Introductory_Chemistry/Introductory_Chemistry_(LibreTexts)/02%3A_Measurement_and_Problem_Solving/2.E%3A_Measurement_and_Problem_Solving_(Exercises)
514
+ β€’ Wikipedia Featured Articles β†’ fetch_webpage:
515
+ https://en.wikipedia.org/wiki/Wikipedia:Featured_articles_promoted_in_2016
516
+ Then search for the specific article's nomination page.
517
+ β€’ Sports stats β†’ search_web("player name stat year site:baseball-reference.com")
518
+ then fetch_webpage the result URL for exact numbers.
519
+ β€’ For ANY other factual question β†’ search_web, then fetch_webpage top result.
520
+
521
+ STEP 3 β€” If first search fails, try different search terms. Try at least 2-3
522
+ different approaches before giving up. Never say "I was unable to find."
523
+
524
+ STEP 4 β€” Answer format:
525
+ β€’ Return ONLY the final value. No explanation. No "The answer is".
526
+ β€’ Numbers: just the number (e.g. "3" not "3 albums").
527
+ β€’ Names: just the name.
528
+ β€’ Yes/No: just "yes" or "no".
529
+ β€’ Lists: comma-separated values."""
530
+
531
+ # ── main call ─────────────────────────────────────────────────────────────
532
 
533
  def __call__(self, question: str, task_id: str = "") -> str:
534
+ print(f"β–Ά Task {task_id[:8]}: {question[:80]}")
535
+
536
+ # Pre-attach image to messages if task has an image file
537
+ fb, ct = self._fetch_file(task_id)
538
+ ct_clean = (ct or "").split(";")[0].strip().lower()
539
+
540
+ user_content = []
541
+ if fb and "image" in ct_clean:
542
+ b64 = base64.b64encode(fb).decode()
543
+ user_content.append({
544
+ "type": "image_url",
545
+ "image_url": {"url": f"data:{ct_clean};base64,{b64}",
546
+ "detail": "high"},
547
+ })
548
+
549
+ user_content.append({
550
+ "type": "text",
551
+ "text": f"task_id: {task_id}\n\nTask: {question}",
552
+ })
553
+
554
+ messages = [
555
+ {"role": "system", "content": self.SYSTEM},
556
+ {"role": "user", "content": user_content},
557
+ ]
558
+
559
+ for _round in range(10):
560
+ try:
561
+ resp = self.client.chat.completions.create(
562
+ model="gpt-4o",
563
+ messages=messages,
564
+ tools=self.TOOLS,
565
+ tool_choice="auto",
566
+ temperature=0,
567
+ max_tokens=1500,
568
+ )
569
+ except Exception as e:
570
+ print(f" OpenAI error: {e}")
571
+ return "Error."
572
+
573
+ msg = resp.choices[0].message
574
+
575
+ # No tool calls β†’ we have the answer
576
+ if not msg.tool_calls:
577
+ answer = (msg.content or "").strip()
578
+ # Reject bad answers
579
+ bad = ("no_file", "file_exists", "i was unable",
580
+ "i couldn't", "i can't access", "please provide",
581
+ "you might want", "i'm unable")
582
+ if any(b in answer.lower() for b in bad):
583
+ # Force a retry with a harder nudge
584
+ messages.append({
585
+ "role": "assistant",
586
+ "content": answer,
587
+ })
588
+ messages.append({
589
+ "role": "user",
590
+ "content": (
591
+ "That answer is not acceptable. "
592
+ "Use your tools to find the real answer. "
593
+ "Try search_web or fetch_wikipedia. "
594
+ "Return ONLY the final value."
595
+ ),
596
+ })
597
+ continue
598
+ return answer
599
+
600
+ # Append assistant turn
601
+ messages.append({
602
+ "role": "assistant",
603
+ "content": msg.content,
604
+ "tool_calls": [
605
+ {
606
+ "id": tc.id,
607
+ "type": "function",
608
+ "function": {
609
+ "name": tc.function.name,
610
+ "arguments": tc.function.arguments,
611
+ },
612
+ }
613
+ for tc in msg.tool_calls
614
+ ],
615
+ })
616
+
617
+ # Execute tools
618
+ for tc in msg.tool_calls:
619
+ fn = tc.function.name
620
+ try:
621
+ args = json.loads(tc.function.arguments)
622
+ except Exception:
623
+ args = {}
624
+ result = self._dispatch(fn, args, task_id, question)
625
+ print(f" {fn}({list(args.values())[:1]}) β†’ {str(result)[:80]}")
626
+ messages.append({
627
+ "role": "tool",
628
+ "tool_call_id": tc.id,
629
+ "content": result or "Empty result.",
630
+ })
631
+
632
+ # Force final answer
633
  try:
634
+ messages.append({
635
+ "role": "user",
636
+ "content": "Final answer only – just the value, no explanation.",
637
+ })
638
+ resp = self.client.chat.completions.create(
639
+ model="gpt-4o", messages=messages,
640
+ temperature=0, max_tokens=100,
641
+ )
642
+ return (resp.choices[0].message.content or "").strip()
643
+ except Exception:
644
+ return "Error."
645
 
646
 
647
+ # ── Gradio UI ─────────────────────────────────────────────────────────────────
648
 
649
  def run_and_submit_all(profile: gr.OAuthProfile | None):
650
+ if not profile:
651
+ return "Please login to Hugging Face first.", None
 
 
 
652
 
653
+ username = profile.username
654
+ space_id = os.getenv("SPACE_ID", "")
655
  api_url = DEFAULT_API_URL
 
 
 
656
 
657
  try:
658
  agent = BasicAgent()
659
  except Exception as e:
660
+ return f"Init failed: {e}", None
661
 
662
  try:
663
+ qs = requests.get(f"{api_url}/questions", timeout=15)
664
+ qs.raise_for_status()
665
+ questions_data = qs.json()
666
  except Exception as e:
667
  return f"Error fetching questions: {e}", None
668
 
669
+ results_log, answers_payload = [], []
 
670
 
671
  for item in questions_data:
672
  task_id = item.get("task_id", "")
673
  question_text = item.get("question", "")
674
  try:
675
+ answer = agent(question_text, task_id=task_id)
 
 
676
  except Exception as e:
677
+ answer = f"Error: {e}"
678
+ print(f" β†’ Answer: {answer[:60]}")
679
 
680
+ answers_payload.append({"task_id": task_id, "submitted_answer": answer})
681
+ results_log.append({
682
+ "Task ID": task_id,
683
+ "Question": question_text[:120],
684
+ "Answer": answer,
685
+ })
686
 
687
  try:
688
+ r = requests.post(
689
+ f"{api_url}/submit",
690
+ json={
691
+ "username": username.strip(),
692
+ "agent_code": f"https://huggingface.co/spaces/{space_id}/tree/main",
693
+ "answers": answers_payload,
694
+ },
695
+ timeout=60,
696
+ )
697
+ r.raise_for_status()
698
+ res = r.json()
699
  status = (
700
+ f"βœ… Submitted!\n"
701
+ f"Score: {res.get('score')}% "
702
+ f"({res.get('correct_count')}/{res.get('total_attempted')})\n"
703
  f"Message: {res.get('message')}"
704
  )
 
705
  except Exception as e:
706
+ status = f"Submission failed: {e}"
707
+
708
+ return status, pd.DataFrame(results_log)
709
 
710
 
711
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
712
  gr.Markdown("# πŸ€– GAIA Agent Evaluation")
713
+ gr.Markdown(
714
+ "Handles: images Β· Python execution Β· Excel Β· audio transcription Β· "
715
+ "Wikipedia Β· YouTube Β· web search"
716
+ )
717
  gr.LoginButton()
718
  run_button = gr.Button("πŸš€ Run Evaluation & Submit", variant="primary")
719
+ status_output = gr.Textbox(label="Status", lines=5)
720
+ results_table = gr.DataFrame(label="Results")
721
+ run_button.click(fn=run_and_submit_all,
722
+ outputs=[status_output, results_table])
723
 
724
  if __name__ == "__main__":
725
+ demo.launch()