bhotta commited on
Commit
1b067ff
Β·
verified Β·
1 Parent(s): f369d85

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +168 -181
app.py CHANGED
@@ -7,10 +7,13 @@ import tempfile
7
  import requests
8
  import pandas as pd
9
  import gradio as gr
10
- from openai import OpenAI
11
 
12
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
13
 
 
 
 
14
 
15
  # ── helpers ───────────────────────────────────────────────────────────────────
16
 
@@ -45,12 +48,17 @@ def _strip_html(html: str) -> str:
45
 
46
  class BasicAgent:
47
  def __init__(self):
48
- api_key = os.getenv("OPENAI_API_KEY")
49
- if not api_key:
50
- raise ValueError("OPENAI_API_KEY missing.")
51
- self.client = OpenAI(api_key=api_key)
 
 
 
 
 
52
  self.api_url = DEFAULT_API_URL
53
- print("βœ… Agent initialised.")
54
 
55
  # ── raw file fetch ────────────────────────────────────────────────────────
56
 
@@ -64,110 +72,106 @@ class BasicAgent:
64
  pass
65
  return None, ""
66
 
67
- # ── tools (called by the loop) ────────────────────────────────────────────
68
 
69
  def tool_check_file(self, task_id: str) -> str:
70
- """Tell the model whether a file exists and what type it is."""
71
  fb, ct = self._fetch_file(task_id)
72
  if not fb:
73
  return "NO_FILE"
74
  ct_clean = ct.split(";")[0].strip().lower()
75
  return (
76
  f"FILE_EXISTS type={ct_clean} size={len(fb)}_bytes. "
77
- f"Use the appropriate tool to read it: "
78
- f"image→analyse_image, python→run_python_file, "
79
  f"excel/xlsxβ†’read_excel_file, audioβ†’transcribe_audio, "
80
  f"text/pdfβ†’read_text_file."
81
  )
82
 
83
  def tool_analyse_image(self, task_id: str, question: str) -> str:
84
- """Pass the image to GPT-4o vision and return its answer."""
85
  fb, ct = self._fetch_file(task_id)
86
  if not fb:
87
  return "No image found."
88
- ct_clean = ct.split(";")[0].strip()
89
  if "image" not in ct_clean:
90
  return f"File is not an image (type={ct_clean})."
91
  b64 = base64.b64encode(fb).decode()
92
- resp = self.client.chat.completions.create(
93
- model="gpt-4o",
94
- messages=[{
95
- "role": "user",
96
- "content": [
97
- {"type": "image_url",
98
- "image_url": {"url": f"data:{ct_clean};base64,{b64}",
99
- "detail": "high"}},
100
- {"type": "text", "text": question},
101
- ],
102
- }],
103
- max_tokens=800,
104
- temperature=0,
105
  )
106
- return resp.choices[0].message.content or "No response."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
 
108
  def tool_run_python_file(self, task_id: str) -> str:
109
- """Download the Python file, execute it, return stdout/stderr."""
110
- fb, ct = self._fetch_file(task_id)
111
  if not fb:
112
  return "No file found."
113
  code = fb.decode("utf-8", errors="ignore")
114
  try:
115
- with tempfile.NamedTemporaryFile(suffix=".py", delete=False,
116
- mode="w") as f:
 
117
  f.write(code)
118
  fname = f.name
119
  result = subprocess.run(
120
  ["python3", fname],
121
- capture_output=True, text=True, timeout=30
122
  )
123
  out = result.stdout.strip()
124
  err = result.stderr.strip()
125
- if out:
126
- return f"STDOUT:\n{out}"
127
- if err:
128
- return f"STDERR:\n{err}"
129
- return "No output."
130
  except Exception as e:
131
  return f"Execution error: {e}"
132
 
133
  def tool_read_excel_file(self, task_id: str, question: str) -> str:
134
- """Download xlsx/csv, load with pandas, let GPT-4o answer about it."""
135
  fb, ct = self._fetch_file(task_id)
136
  if not fb:
137
  return "No file found."
138
  try:
139
  import io
140
  ct_clean = ct.split(";")[0].strip().lower()
141
- if "csv" in ct_clean or "text" in ct_clean:
142
- df = pd.read_csv(io.BytesIO(fb))
143
- else:
144
- df = pd.read_excel(io.BytesIO(fb))
145
- preview = df.to_string(max_rows=60, max_cols=20)
146
- # Ask GPT-4o to answer the question from the data
147
- resp = self.client.chat.completions.create(
148
- model="gpt-4o",
149
- messages=[{
150
- "role": "user",
151
- "content": (
152
- f"Here is a spreadsheet (first 60 rows):\n\n{preview}\n\n"
153
- f"Question: {question}\n"
154
- f"Answer with ONLY the final value, no explanation."
155
- ),
156
- }],
157
- max_tokens=200,
158
- temperature=0,
159
  )
160
- return resp.choices[0].message.content or "No answer."
161
  except Exception as e:
162
  return f"Excel read error: {e}"
163
 
164
  def tool_transcribe_audio(self, task_id: str) -> str:
165
- """Download audio and transcribe with Whisper."""
166
  fb, ct = self._fetch_file(task_id)
167
  if not fb:
168
  return "No file found."
169
  try:
170
- # Guess extension
171
  ct_clean = ct.split(";")[0].strip().lower()
172
  ext_map = {
173
  "audio/mpeg": ".mp3", "audio/mp3": ".mp3",
@@ -179,28 +183,28 @@ class BasicAgent:
179
  with tempfile.NamedTemporaryFile(suffix=ext, delete=False) as f:
180
  f.write(fb)
181
  fname = f.name
 
 
 
 
 
182
  with open(fname, "rb") as audio_f:
183
- transcript = self.client.audio.transcriptions.create(
184
- model="whisper-1", file=audio_f
185
- )
186
- return transcript.text
187
  except Exception as e:
188
  return f"Transcription error: {e}"
189
 
190
  def tool_read_text_file(self, task_id: str) -> str:
191
- """Read text/PDF file content."""
192
  fb, ct = self._fetch_file(task_id)
193
  if not fb:
194
  return "No file found."
195
  try:
196
  ct_clean = ct.split(";")[0].strip().lower()
197
  if "pdf" in ct_clean:
198
- # Try pdfminer or just decode bytes
199
  try:
200
  import pdfminer.high_level
201
  import io
202
- text = pdfminer.high_level.extract_text(io.BytesIO(fb))
203
- return text[:6000]
204
  except ImportError:
205
  pass
206
  return fb.decode("utf-8", errors="ignore")[:6000]
@@ -208,13 +212,11 @@ class BasicAgent:
208
  return f"Read error: {e}"
209
 
210
  def tool_search_web(self, query: str) -> str:
211
- """DuckDuckGo HTML search – stable from cloud IPs."""
212
  try:
213
  hdrs = {
214
  "User-Agent": (
215
  "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
216
- "AppleWebKit/537.36 (KHTML, like Gecko) "
217
- "Chrome/124.0 Safari/537.36"
218
  )
219
  }
220
  r = requests.get(
@@ -263,7 +265,6 @@ class BasicAgent:
263
  return f"Fetch error: {e}"
264
 
265
  def tool_fetch_wikipedia(self, title: str) -> str:
266
- """Use Wikipedia REST API (no 403 issues)."""
267
  try:
268
  slug = requests.utils.quote(title.replace(" ", "_"))
269
  r = requests.get(
@@ -271,9 +272,7 @@ class BasicAgent:
271
  timeout=12,
272
  )
273
  if r.status_code == 200:
274
- data = r.json()
275
- return data.get("extract", "Not found.")
276
- # Fallback: full extract via w/api.php
277
  r2 = requests.get(
278
  "https://en.wikipedia.org/w/api.php",
279
  params={
@@ -305,8 +304,7 @@ class BasicAgent:
305
  ("blocked", "ip", "cloud", "requestblocked", "ipblocked")):
306
  return (
307
  "BLOCKED: YouTube blocks cloud IPs. "
308
- "Use search_web to find transcript/description of this video. "
309
- "Search for the video title + key phrase from the question."
310
  )
311
  return f"Transcript error: {err}"
312
 
@@ -319,7 +317,7 @@ class BasicAgent:
319
  "name": "check_file",
320
  "description": (
321
  "ALWAYS call this first. Checks if a file is attached to the task. "
322
- "Returns 'NO_FILE' or info about the file type and how to read it."
323
  ),
324
  "parameters": {
325
  "type": "object",
@@ -333,15 +331,17 @@ class BasicAgent:
333
  "function": {
334
  "name": "analyse_image",
335
  "description": (
336
- "Analyse an image file attached to the task using GPT-4o vision. "
337
  "Use for chess boards, diagrams, photos, screenshots."
338
  ),
339
  "parameters": {
340
  "type": "object",
341
  "properties": {
342
  "task_id": {"type": "string"},
343
- "question": {"type": "string",
344
- "description": "What to find/answer from the image."},
 
 
345
  },
346
  "required": ["task_id", "question"],
347
  },
@@ -353,7 +353,7 @@ class BasicAgent:
353
  "name": "run_python_file",
354
  "description": (
355
  "Execute the Python file attached to the task and return its output. "
356
- "Use when the task asks for the output of Python code."
357
  ),
358
  "parameters": {
359
  "type": "object",
@@ -366,10 +366,7 @@ class BasicAgent:
366
  "type": "function",
367
  "function": {
368
  "name": "read_excel_file",
369
- "description": (
370
- "Read an Excel or CSV file attached to the task and answer "
371
- "a question about its data."
372
- ),
373
  "parameters": {
374
  "type": "object",
375
  "properties": {
@@ -385,7 +382,7 @@ class BasicAgent:
385
  "function": {
386
  "name": "transcribe_audio",
387
  "description": (
388
- "Transcribe an audio file attached to the task using Whisper. "
389
  "Use for voice memos, recordings, audio questions."
390
  ),
391
  "parameters": {
@@ -412,8 +409,8 @@ class BasicAgent:
412
  "function": {
413
  "name": "youtube_transcript",
414
  "description": (
415
- "Fetch YouTube video transcript. If cloud-blocked, "
416
- "returns instructions to use search_web instead."
417
  ),
418
  "parameters": {
419
  "type": "object",
@@ -426,7 +423,7 @@ class BasicAgent:
426
  "type": "function",
427
  "function": {
428
  "name": "search_web",
429
- "description": "Search the web via DuckDuckGo. Returns top snippets.",
430
  "parameters": {
431
  "type": "object",
432
  "properties": {"query": {"type": "string"}},
@@ -438,7 +435,7 @@ class BasicAgent:
438
  "type": "function",
439
  "function": {
440
  "name": "fetch_webpage",
441
- "description": "Fetch and read the full text content of any URL.",
442
  "parameters": {
443
  "type": "object",
444
  "properties": {"url": {"type": "string"}},
@@ -451,8 +448,8 @@ class BasicAgent:
451
  "function": {
452
  "name": "fetch_wikipedia",
453
  "description": (
454
- "Fetch a Wikipedia article by exact title. "
455
- "Always use this instead of fetch_webpage for Wikipedia."
456
  ),
457
  "parameters": {
458
  "type": "object",
@@ -492,152 +489,141 @@ class BasicAgent:
492
 
493
  SYSTEM = """You are a precise research agent solving GAIA benchmark tasks.
494
 
495
- MANDATORY WORKFLOW β€” follow every step, no exceptions:
496
-
497
- STEP 1 β€” Always call check_file(task_id) first, regardless of the question.
498
- β€’ If NO_FILE β†’ go to STEP 2.
499
- β€’ If FILE_EXISTS image β†’ call analyse_image(task_id, full_question).
500
- β€’ If FILE_EXISTS python β†’ call run_python_file(task_id). The output IS the answer.
501
- β€’ If FILE_EXISTS excel/xlsx/csv β†’ call read_excel_file(task_id, question).
502
- β€’ If FILE_EXISTS audio β†’ call transcribe_audio(task_id), then answer from transcript.
503
- β€’ If FILE_EXISTS text/pdf β†’ call read_text_file(task_id), then answer from content.
504
- CRITICAL: NEVER return "NO_FILE" or any tool status string as your final answer.
505
-
506
- STEP 2 β€” Gather information using tools.
507
- β€’ YouTube URL in question β†’ call youtube_transcript(url) first.
508
- If BLOCKED β†’ use search_web("video title + key phrase") to find the answer.
509
- β€’ Wikipedia question β†’ call fetch_wikipedia("Exact Article Title").
510
- For discography β†’ look at Studio albums table. Count ONLY solo studio albums.
511
- Do NOT count: collaborations, live albums, compilations, EPs.
512
- β€’ LibreTexts 1.E Exercises β†’ fetch_webpage with EXACT URL:
513
  https://chem.libretexts.org/Bookshelves/Introductory_Chemistry/Introductory_Chemistry_(LibreTexts)/02%3A_Measurement_and_Problem_Solving/2.E%3A_Measurement_and_Problem_Solving_(Exercises)
514
- β€’ Wikipedia Featured Articles β†’ fetch_webpage:
515
- https://en.wikipedia.org/wiki/Wikipedia:Featured_articles_promoted_in_2016
516
- Then search for the specific article's nomination page.
517
- β€’ Sports stats β†’ search_web("player name stat year site:baseball-reference.com")
518
- then fetch_webpage the result URL for exact numbers.
519
- β€’ For ANY other factual question β†’ search_web, then fetch_webpage top result.
520
-
521
- STEP 3 β€” If first search fails, try different search terms. Try at least 2-3
522
- different approaches before giving up. Never say "I was unable to find."
523
-
524
- STEP 4 β€” Answer format:
525
- β€’ Return ONLY the final value. No explanation. No "The answer is".
526
- β€’ Numbers: just the number (e.g. "3" not "3 albums").
527
- β€’ Names: just the name.
528
- β€’ Yes/No: just "yes" or "no".
529
- β€’ Lists: comma-separated values."""
530
 
531
  # ── main call ─────────────────────────────────────────────────────────────
532
 
533
  def __call__(self, question: str, task_id: str = "") -> str:
534
  print(f"β–Ά Task {task_id[:8]}: {question[:80]}")
535
 
536
- # Pre-attach image to messages if task has an image file
537
- fb, ct = self._fetch_file(task_id)
538
- ct_clean = (ct or "").split(";")[0].strip().lower()
539
-
540
- user_content = []
541
- if fb and "image" in ct_clean:
542
- b64 = base64.b64encode(fb).decode()
543
- user_content.append({
544
- "type": "image_url",
545
- "image_url": {"url": f"data:{ct_clean};base64,{b64}",
546
- "detail": "high"},
547
- })
548
-
549
- user_content.append({
550
- "type": "text",
551
- "text": f"task_id: {task_id}\n\nTask: {question}",
552
- })
553
-
554
  messages = [
555
  {"role": "system", "content": self.SYSTEM},
556
- {"role": "user", "content": user_content},
 
 
 
557
  ]
558
 
 
 
 
 
 
 
559
  for _round in range(10):
560
  try:
561
- resp = self.client.chat.completions.create(
562
- model="gpt-4o",
563
  messages=messages,
564
  tools=self.TOOLS,
565
  tool_choice="auto",
566
- temperature=0,
567
  max_tokens=1500,
 
568
  )
569
  except Exception as e:
570
- print(f" OpenAI error: {e}")
571
- return "Error."
 
 
 
 
 
 
 
 
 
 
572
 
573
  msg = resp.choices[0].message
 
574
 
575
- # No tool calls β†’ we have the answer
576
- if not msg.tool_calls:
577
  answer = (msg.content or "").strip()
578
- # Reject bad answers
579
- bad = ("no_file", "file_exists", "i was unable",
580
- "i couldn't", "i can't access", "please provide",
581
- "you might want", "i'm unable")
582
- if any(b in answer.lower() for b in bad):
583
- # Force a retry with a harder nudge
584
- messages.append({
585
- "role": "assistant",
586
- "content": answer,
587
- })
588
  messages.append({
589
  "role": "user",
590
  "content": (
591
- "That answer is not acceptable. "
592
- "Use your tools to find the real answer. "
593
- "Try search_web or fetch_wikipedia. "
594
- "Return ONLY the final value."
595
  ),
596
  })
597
  continue
598
  return answer
599
 
600
- # Append assistant turn
601
  messages.append({
602
  "role": "assistant",
603
- "content": msg.content,
604
  "tool_calls": [
605
  {
606
  "id": tc.id,
607
  "type": "function",
608
  "function": {
609
  "name": tc.function.name,
610
- "arguments": tc.function.arguments,
 
 
611
  },
612
  }
613
- for tc in msg.tool_calls
614
  ],
615
  })
616
 
617
  # Execute tools
618
- for tc in msg.tool_calls:
619
  fn = tc.function.name
620
  try:
621
- args = json.loads(tc.function.arguments)
 
 
 
 
 
622
  except Exception:
623
  args = {}
 
624
  result = self._dispatch(fn, args, task_id, question)
625
- print(f" {fn}({list(args.values())[:1]}) β†’ {str(result)[:80]}")
 
626
  messages.append({
627
  "role": "tool",
628
  "tool_call_id": tc.id,
629
  "content": result or "Empty result.",
630
  })
631
 
632
- # Force final answer
633
  try:
634
  messages.append({
635
  "role": "user",
636
- "content": "Final answer only – just the value, no explanation.",
637
  })
638
- resp = self.client.chat.completions.create(
639
- model="gpt-4o", messages=messages,
640
- temperature=0, max_tokens=100,
641
  )
642
  return (resp.choices[0].message.content or "").strip()
643
  except Exception:
@@ -675,7 +661,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
675
  answer = agent(question_text, task_id=task_id)
676
  except Exception as e:
677
  answer = f"Error: {e}"
678
- print(f" β†’ Answer: {answer[:60]}")
679
 
680
  answers_payload.append({"task_id": task_id, "submitted_answer": answer})
681
  results_log.append({
@@ -709,10 +695,11 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
709
 
710
 
711
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
712
- gr.Markdown("# πŸ€– GAIA Agent Evaluation")
713
  gr.Markdown(
714
- "Handles: images Β· Python execution Β· Excel Β· audio transcription Β· "
715
- "Wikipedia Β· YouTube Β· web search"
 
716
  )
717
  gr.LoginButton()
718
  run_button = gr.Button("πŸš€ Run Evaluation & Submit", variant="primary")
 
7
  import requests
8
  import pandas as pd
9
  import gradio as gr
10
+ from huggingface_hub import InferenceClient
11
 
12
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
13
 
14
+ # Free HF model β€” best available for tool-calling
15
+ HF_MODEL = "Qwen/Qwen2.5-72B-Instruct"
16
+
17
 
18
  # ── helpers ───────────────────────────────────────────────────────────────────
19
 
 
48
 
49
  class BasicAgent:
50
  def __init__(self):
51
+ hf_token = os.getenv("HF_TOKEN")
52
+ if not hf_token:
53
+ raise ValueError(
54
+ "HF_TOKEN missing. Add your Hugging Face token to Space Secrets."
55
+ )
56
+ self.client = InferenceClient(
57
+ model=HF_MODEL,
58
+ token=hf_token,
59
+ )
60
  self.api_url = DEFAULT_API_URL
61
+ print(f"βœ… Agent initialised with model: {HF_MODEL}")
62
 
63
  # ── raw file fetch ────────────────────────────────────────────────────────
64
 
 
72
  pass
73
  return None, ""
74
 
75
+ # ── tool implementations ──────────────────────────────────────────────────
76
 
77
  def tool_check_file(self, task_id: str) -> str:
 
78
  fb, ct = self._fetch_file(task_id)
79
  if not fb:
80
  return "NO_FILE"
81
  ct_clean = ct.split(";")[0].strip().lower()
82
  return (
83
  f"FILE_EXISTS type={ct_clean} size={len(fb)}_bytes. "
84
+ f"Use the right tool: image→analyse_image, python→run_python_file, "
 
85
  f"excel/xlsxβ†’read_excel_file, audioβ†’transcribe_audio, "
86
  f"text/pdfβ†’read_text_file."
87
  )
88
 
89
  def tool_analyse_image(self, task_id: str, question: str) -> str:
90
+ """Describe/analyse image using HF vision model."""
91
  fb, ct = self._fetch_file(task_id)
92
  if not fb:
93
  return "No image found."
94
+ ct_clean = ct.split(";")[0].strip().lower()
95
  if "image" not in ct_clean:
96
  return f"File is not an image (type={ct_clean})."
97
  b64 = base64.b64encode(fb).decode()
98
+
99
+ # Use a vision-capable model via InferenceClient
100
+ vision_client = InferenceClient(
101
+ model="Qwen/Qwen2.5-VL-72B-Instruct",
102
+ token=os.getenv("HF_TOKEN"),
 
 
 
 
 
 
 
 
103
  )
104
+ try:
105
+ result = vision_client.chat_completion(
106
+ messages=[{
107
+ "role": "user",
108
+ "content": [
109
+ {
110
+ "type": "image_url",
111
+ "image_url": {
112
+ "url": f"data:{ct_clean};base64,{b64}"
113
+ },
114
+ },
115
+ {"type": "text", "text": question},
116
+ ],
117
+ }],
118
+ max_tokens=800,
119
+ )
120
+ return result.choices[0].message.content or "No response."
121
+ except Exception as e:
122
+ # Fallback to text-only description attempt
123
+ return f"Vision error: {e}. Try describing from context."
124
 
125
  def tool_run_python_file(self, task_id: str) -> str:
126
+ """Download and execute Python file, return stdout."""
127
+ fb, _ = self._fetch_file(task_id)
128
  if not fb:
129
  return "No file found."
130
  code = fb.decode("utf-8", errors="ignore")
131
  try:
132
+ with tempfile.NamedTemporaryFile(
133
+ suffix=".py", delete=False, mode="w"
134
+ ) as f:
135
  f.write(code)
136
  fname = f.name
137
  result = subprocess.run(
138
  ["python3", fname],
139
+ capture_output=True, text=True, timeout=30,
140
  )
141
  out = result.stdout.strip()
142
  err = result.stderr.strip()
143
+ return f"STDOUT:\n{out}" if out else f"STDERR:\n{err}" if err else "No output."
 
 
 
 
144
  except Exception as e:
145
  return f"Execution error: {e}"
146
 
147
  def tool_read_excel_file(self, task_id: str, question: str) -> str:
148
+ """Load Excel/CSV and answer a question about it."""
149
  fb, ct = self._fetch_file(task_id)
150
  if not fb:
151
  return "No file found."
152
  try:
153
  import io
154
  ct_clean = ct.split(";")[0].strip().lower()
155
+ df = (
156
+ pd.read_csv(io.BytesIO(fb))
157
+ if ("csv" in ct_clean or "text" in ct_clean)
158
+ else pd.read_excel(io.BytesIO(fb))
159
+ )
160
+ preview = df.to_string(max_rows=80, max_cols=20)
161
+ # Ask the LLM inline (no extra API call – just return data+question)
162
+ return (
163
+ f"SPREADSHEET DATA:\n{preview}\n\n"
164
+ f"Answer the following about this data: {question}"
 
 
 
 
 
 
 
 
165
  )
 
166
  except Exception as e:
167
  return f"Excel read error: {e}"
168
 
169
  def tool_transcribe_audio(self, task_id: str) -> str:
170
+ """Transcribe audio using HF Whisper."""
171
  fb, ct = self._fetch_file(task_id)
172
  if not fb:
173
  return "No file found."
174
  try:
 
175
  ct_clean = ct.split(";")[0].strip().lower()
176
  ext_map = {
177
  "audio/mpeg": ".mp3", "audio/mp3": ".mp3",
 
183
  with tempfile.NamedTemporaryFile(suffix=ext, delete=False) as f:
184
  f.write(fb)
185
  fname = f.name
186
+
187
+ asr_client = InferenceClient(
188
+ model="openai/whisper-large-v3",
189
+ token=os.getenv("HF_TOKEN"),
190
+ )
191
  with open(fname, "rb") as audio_f:
192
+ result = asr_client.automatic_speech_recognition(audio_f)
193
+ return result.text if hasattr(result, "text") else str(result)
 
 
194
  except Exception as e:
195
  return f"Transcription error: {e}"
196
 
197
  def tool_read_text_file(self, task_id: str) -> str:
 
198
  fb, ct = self._fetch_file(task_id)
199
  if not fb:
200
  return "No file found."
201
  try:
202
  ct_clean = ct.split(";")[0].strip().lower()
203
  if "pdf" in ct_clean:
 
204
  try:
205
  import pdfminer.high_level
206
  import io
207
+ return pdfminer.high_level.extract_text(io.BytesIO(fb))[:6000]
 
208
  except ImportError:
209
  pass
210
  return fb.decode("utf-8", errors="ignore")[:6000]
 
212
  return f"Read error: {e}"
213
 
214
  def tool_search_web(self, query: str) -> str:
 
215
  try:
216
  hdrs = {
217
  "User-Agent": (
218
  "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
219
+ "AppleWebKit/537.36 Chrome/124.0 Safari/537.36"
 
220
  )
221
  }
222
  r = requests.get(
 
265
  return f"Fetch error: {e}"
266
 
267
  def tool_fetch_wikipedia(self, title: str) -> str:
 
268
  try:
269
  slug = requests.utils.quote(title.replace(" ", "_"))
270
  r = requests.get(
 
272
  timeout=12,
273
  )
274
  if r.status_code == 200:
275
+ return r.json().get("extract", "Not found.")
 
 
276
  r2 = requests.get(
277
  "https://en.wikipedia.org/w/api.php",
278
  params={
 
304
  ("blocked", "ip", "cloud", "requestblocked", "ipblocked")):
305
  return (
306
  "BLOCKED: YouTube blocks cloud IPs. "
307
+ "Use search_web to find transcript or description of this video."
 
308
  )
309
  return f"Transcript error: {err}"
310
 
 
317
  "name": "check_file",
318
  "description": (
319
  "ALWAYS call this first. Checks if a file is attached to the task. "
320
+ "Returns NO_FILE or the file type and which tool to use next."
321
  ),
322
  "parameters": {
323
  "type": "object",
 
331
  "function": {
332
  "name": "analyse_image",
333
  "description": (
334
+ "Analyse an image file attached to the task using a vision model. "
335
  "Use for chess boards, diagrams, photos, screenshots."
336
  ),
337
  "parameters": {
338
  "type": "object",
339
  "properties": {
340
  "task_id": {"type": "string"},
341
+ "question": {
342
+ "type": "string",
343
+ "description": "What to find or answer from the image.",
344
+ },
345
  },
346
  "required": ["task_id", "question"],
347
  },
 
353
  "name": "run_python_file",
354
  "description": (
355
  "Execute the Python file attached to the task and return its output. "
356
+ "The stdout IS the answer."
357
  ),
358
  "parameters": {
359
  "type": "object",
 
366
  "type": "function",
367
  "function": {
368
  "name": "read_excel_file",
369
+ "description": "Read an Excel or CSV file and answer a question about its data.",
 
 
 
370
  "parameters": {
371
  "type": "object",
372
  "properties": {
 
382
  "function": {
383
  "name": "transcribe_audio",
384
  "description": (
385
+ "Transcribe an audio file using Whisper. "
386
  "Use for voice memos, recordings, audio questions."
387
  ),
388
  "parameters": {
 
409
  "function": {
410
  "name": "youtube_transcript",
411
  "description": (
412
+ "Fetch YouTube video transcript. "
413
+ "If cloud-blocked, use search_web instead."
414
  ),
415
  "parameters": {
416
  "type": "object",
 
423
  "type": "function",
424
  "function": {
425
  "name": "search_web",
426
+ "description": "Search the web via DuckDuckGo. Returns top result snippets.",
427
  "parameters": {
428
  "type": "object",
429
  "properties": {"query": {"type": "string"}},
 
435
  "type": "function",
436
  "function": {
437
  "name": "fetch_webpage",
438
+ "description": "Fetch and read the full text of any URL.",
439
  "parameters": {
440
  "type": "object",
441
  "properties": {"url": {"type": "string"}},
 
448
  "function": {
449
  "name": "fetch_wikipedia",
450
  "description": (
451
+ "Fetch a Wikipedia article by exact title via REST API. "
452
+ "Always prefer this over fetch_webpage for Wikipedia."
453
  ),
454
  "parameters": {
455
  "type": "object",
 
489
 
490
  SYSTEM = """You are a precise research agent solving GAIA benchmark tasks.
491
 
492
+ MANDATORY WORKFLOW:
493
+
494
+ STEP 1 β€” Call check_file(task_id) first for every task.
495
+ β€’ NO_FILE β†’ go to STEP 2.
496
+ β€’ image file β†’ call analyse_image(task_id, question).
497
+ β€’ python file β†’ call run_python_file(task_id). Its output IS the answer.
498
+ β€’ excel/csv file β†’ call read_excel_file(task_id, question).
499
+ β€’ audio file β†’ call transcribe_audio(task_id), then answer from transcript.
500
+ β€’ text/pdf file β†’ call read_text_file(task_id), then answer from content.
501
+ NEVER return "NO_FILE" or tool status strings as your final answer.
502
+
503
+ STEP 2 β€” Gather information.
504
+ β€’ YouTube URL β†’ call youtube_transcript(url). If BLOCKED β†’ search_web.
505
+ β€’ Wikipedia question β†’ fetch_wikipedia("Exact Article Title").
506
+ Discography β†’ count ONLY solo studio albums (not collaborations/live/EP).
507
+ β€’ LibreTexts 1.E β†’ fetch_webpage:
 
 
508
  https://chem.libretexts.org/Bookshelves/Introductory_Chemistry/Introductory_Chemistry_(LibreTexts)/02%3A_Measurement_and_Problem_Solving/2.E%3A_Measurement_and_Problem_Solving_(Exercises)
509
+ β€’ Sports stats β†’ search_web then fetch_webpage for exact numbers.
510
+ β€’ Any other question β†’ search_web, then fetch_webpage for details.
511
+
512
+ STEP 3 β€” Try at least 2-3 different search queries before concluding.
513
+ Never say "I was unable to find." Always use tools to find the answer.
514
+
515
+ STEP 4 β€” Final answer: ONLY the value. No explanation. No preamble.
516
+ Numbers: just digits. Names: just the name. Lists: comma-separated."""
 
 
 
 
 
 
 
 
517
 
518
  # ── main call ─────────────────────────────────────────────────────────────
519
 
520
  def __call__(self, question: str, task_id: str = "") -> str:
521
  print(f"β–Ά Task {task_id[:8]}: {question[:80]}")
522
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
523
  messages = [
524
  {"role": "system", "content": self.SYSTEM},
525
+ {
526
+ "role": "user",
527
+ "content": f"task_id: {task_id}\n\nTask: {question}",
528
+ },
529
  ]
530
 
531
+ bad_phrases = (
532
+ "no_file", "file_exists", "i was unable", "i couldn't",
533
+ "i can't access", "please provide", "you might want",
534
+ "i'm unable", "i cannot", "i am unable",
535
+ )
536
+
537
  for _round in range(10):
538
  try:
539
+ resp = self.client.chat_completion(
 
540
  messages=messages,
541
  tools=self.TOOLS,
542
  tool_choice="auto",
 
543
  max_tokens=1500,
544
+ temperature=0.1,
545
  )
546
  except Exception as e:
547
+ print(f" HF API error: {e}")
548
+ # Retry without tools if tool_choice unsupported
549
+ try:
550
+ resp = self.client.chat_completion(
551
+ messages=messages,
552
+ max_tokens=500,
553
+ temperature=0.1,
554
+ )
555
+ return (resp.choices[0].message.content or "").strip()
556
+ except Exception as e2:
557
+ print(f" Fallback error: {e2}")
558
+ return "Error."
559
 
560
  msg = resp.choices[0].message
561
+ tool_calls = getattr(msg, "tool_calls", None)
562
 
563
+ # No tool calls β†’ final answer
564
+ if not tool_calls:
565
  answer = (msg.content or "").strip()
566
+ if any(b in answer.lower() for b in bad_phrases):
567
+ messages.append({"role": "assistant", "content": answer})
 
 
 
 
 
 
 
 
568
  messages.append({
569
  "role": "user",
570
  "content": (
571
+ "That is not acceptable. Use your tools to find the "
572
+ "real answer. Return ONLY the final value."
 
 
573
  ),
574
  })
575
  continue
576
  return answer
577
 
578
+ # Append assistant message with tool calls
579
  messages.append({
580
  "role": "assistant",
581
+ "content": msg.content or "",
582
  "tool_calls": [
583
  {
584
  "id": tc.id,
585
  "type": "function",
586
  "function": {
587
  "name": tc.function.name,
588
+ "arguments": tc.function.arguments
589
+ if isinstance(tc.function.arguments, str)
590
+ else json.dumps(tc.function.arguments),
591
  },
592
  }
593
+ for tc in tool_calls
594
  ],
595
  })
596
 
597
  # Execute tools
598
+ for tc in tool_calls:
599
  fn = tc.function.name
600
  try:
601
+ raw_args = tc.function.arguments
602
+ args = (
603
+ json.loads(raw_args)
604
+ if isinstance(raw_args, str)
605
+ else raw_args
606
+ )
607
  except Exception:
608
  args = {}
609
+
610
  result = self._dispatch(fn, args, task_id, question)
611
+ print(f" {fn} β†’ {str(result)[:80]}")
612
+
613
  messages.append({
614
  "role": "tool",
615
  "tool_call_id": tc.id,
616
  "content": result or "Empty result.",
617
  })
618
 
619
+ # Force final answer after max rounds
620
  try:
621
  messages.append({
622
  "role": "user",
623
+ "content": "Final answer only β€” just the value, no explanation.",
624
  })
625
+ resp = self.client.chat_completion(
626
+ messages=messages, max_tokens=100, temperature=0.1,
 
627
  )
628
  return (resp.choices[0].message.content or "").strip()
629
  except Exception:
 
661
  answer = agent(question_text, task_id=task_id)
662
  except Exception as e:
663
  answer = f"Error: {e}"
664
+ print(f" β†’ {answer[:60]}")
665
 
666
  answers_payload.append({"task_id": task_id, "submitted_answer": answer})
667
  results_log.append({
 
695
 
696
 
697
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
698
+ gr.Markdown("# πŸ€– GAIA Agent β€” Free HuggingFace Models")
699
  gr.Markdown(
700
+ f"**LLM:** `{HF_MODEL}` (free via HF Inference API) \n"
701
+ "**Vision:** `Qwen/Qwen2.5-VL-72B-Instruct` \n"
702
+ "**ASR:** `openai/whisper-large-v3`"
703
  )
704
  gr.LoginButton()
705
  run_button = gr.Button("πŸš€ Run Evaluation & Submit", variant="primary")