bhotta commited on
Commit
d91971a
Β·
verified Β·
1 Parent(s): 9c7a095

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +178 -233
app.py CHANGED
@@ -8,25 +8,11 @@ import requests
8
  import pandas as pd
9
  import gradio as gr
10
  from huggingface_hub import InferenceClient
 
11
 
12
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
13
 
14
- # Free HF model β€” best available for tool-calling
15
- HF_MODEL = "Qwen/Qwen2.5-72B-Instruct"
16
-
17
-
18
  # ── helpers ───────────────────────────────────────────────────────────────────
19
- def _get_hf_token():
20
- """
21
- HF Spaces automatically injects the token under several variable names.
22
- We try all of them. No manual secret needed.
23
- """
24
- for var in ("HF_TOKEN", "HUGGING_FACE_HUB_TOKEN", "HUGGINGFACE_HUB_TOKEN"):
25
- token = os.getenv(var, "").strip()
26
- if token:
27
- return token
28
- return None
29
-
30
  def _strip_html(html: str) -> str:
31
  from html.parser import HTMLParser
32
 
@@ -58,14 +44,26 @@ def _strip_html(html: str) -> str:
58
 
59
  class BasicAgent:
60
  def __init__(self):
61
- hf_token = _get_hf_token()
62
- self.hf_token = hf_token
63
- self.client = InferenceClient(
64
- model=HF_MODEL,
65
- token=hf_token,
66
  )
 
 
 
 
 
 
 
67
  self.api_url = DEFAULT_API_URL
68
- print(f"βœ… Agent initialised with model: {HF_MODEL}")
 
 
 
 
 
 
 
69
 
70
  # ── raw file fetch ────────────────────────────────────────────────────────
71
 
@@ -94,7 +92,7 @@ class BasicAgent:
94
  )
95
 
96
  def tool_analyse_image(self, task_id: str, question: str) -> str:
97
- """Describe/analyse image using HF vision model."""
98
  fb, ct = self._fetch_file(task_id)
99
  if not fb:
100
  return "No image found."
@@ -103,31 +101,38 @@ class BasicAgent:
103
  return f"File is not an image (type={ct_clean})."
104
  b64 = base64.b64encode(fb).decode()
105
 
106
- # Use a vision-capable model via InferenceClient
107
- vision_client = InferenceClient(
108
- model="Qwen/Qwen2.5-VL-72B-Instruct",
109
- token=self.hf_token,
110
- )
 
 
 
 
 
111
  try:
112
- result = vision_client.chat_completion(
 
 
113
  messages=[{
114
  "role": "user",
115
  "content": [
116
  {
117
- "type": "image_url",
118
- "image_url": {
119
- "url": f"data:{ct_clean};base64,{b64}"
 
 
120
  },
121
  },
122
  {"type": "text", "text": question},
123
  ],
124
  }],
125
- max_tokens=800,
126
  )
127
- return result.choices[0].message.content or "No response."
128
  except Exception as e:
129
- # Fallback to text-only description attempt
130
- return f"Vision error: {e}. Try describing from context."
131
 
132
  def tool_run_python_file(self, task_id: str) -> str:
133
  """Download and execute Python file, return stdout."""
@@ -165,7 +170,6 @@ class BasicAgent:
165
  else pd.read_excel(io.BytesIO(fb))
166
  )
167
  preview = df.to_string(max_rows=80, max_cols=20)
168
- # Ask the LLM inline (no extra API call – just return data+question)
169
  return (
170
  f"SPREADSHEET DATA:\n{preview}\n\n"
171
  f"Answer the following about this data: {question}"
@@ -174,7 +178,7 @@ class BasicAgent:
174
  return f"Excel read error: {e}"
175
 
176
  def tool_transcribe_audio(self, task_id: str) -> str:
177
- """Transcribe audio using HF Whisper."""
178
  fb, ct = self._fetch_file(task_id)
179
  if not fb:
180
  return "No file found."
@@ -191,13 +195,16 @@ class BasicAgent:
191
  f.write(fb)
192
  fname = f.name
193
 
194
- asr_client = InferenceClient(
195
- model="openai/whisper-large-v3",
196
- token=self.hf_token,
197
- )
198
- with open(fname, "rb") as audio_f:
199
- result = asr_client.automatic_speech_recognition(audio_f)
200
- return result.text if hasattr(result, "text") else str(result)
 
 
 
201
  except Exception as e:
202
  return f"Transcription error: {e}"
203
 
@@ -315,154 +322,124 @@ class BasicAgent:
315
  )
316
  return f"Transcript error: {err}"
317
 
318
- # ── tool dispatch ─────────────────────────────────────────────────────────
319
 
320
  TOOLS = [
321
  {
322
- "type": "function",
323
- "function": {
324
- "name": "check_file",
325
- "description": (
326
- "ALWAYS call this first. Checks if a file is attached to the task. "
327
- "Returns NO_FILE or the file type and which tool to use next."
328
- ),
329
- "parameters": {
330
- "type": "object",
331
- "properties": {"task_id": {"type": "string"}},
332
- "required": ["task_id"],
333
- },
334
  },
335
  },
336
  {
337
- "type": "function",
338
- "function": {
339
- "name": "analyse_image",
340
- "description": (
341
- "Analyse an image file attached to the task using a vision model. "
342
- "Use for chess boards, diagrams, photos, screenshots."
343
- ),
344
- "parameters": {
345
- "type": "object",
346
- "properties": {
347
- "task_id": {"type": "string"},
348
- "question": {
349
- "type": "string",
350
- "description": "What to find or answer from the image.",
351
- },
352
  },
353
- "required": ["task_id", "question"],
354
  },
 
355
  },
356
  },
357
  {
358
- "type": "function",
359
- "function": {
360
- "name": "run_python_file",
361
- "description": (
362
- "Execute the Python file attached to the task and return its output. "
363
- "The stdout IS the answer."
364
- ),
365
- "parameters": {
366
- "type": "object",
367
- "properties": {"task_id": {"type": "string"}},
368
- "required": ["task_id"],
369
- },
370
  },
371
  },
372
  {
373
- "type": "function",
374
- "function": {
375
- "name": "read_excel_file",
376
- "description": "Read an Excel or CSV file and answer a question about its data.",
377
- "parameters": {
378
- "type": "object",
379
- "properties": {
380
- "task_id": {"type": "string"},
381
- "question": {"type": "string"},
382
- },
383
- "required": ["task_id", "question"],
384
  },
 
385
  },
386
  },
387
  {
388
- "type": "function",
389
- "function": {
390
- "name": "transcribe_audio",
391
- "description": (
392
- "Transcribe an audio file using Whisper. "
393
- "Use for voice memos, recordings, audio questions."
394
- ),
395
- "parameters": {
396
- "type": "object",
397
- "properties": {"task_id": {"type": "string"}},
398
- "required": ["task_id"],
399
- },
400
  },
401
  },
402
  {
403
- "type": "function",
404
- "function": {
405
- "name": "read_text_file",
406
- "description": "Read a text or PDF file attached to the task.",
407
- "parameters": {
408
- "type": "object",
409
- "properties": {"task_id": {"type": "string"}},
410
- "required": ["task_id"],
411
- },
412
  },
413
  },
414
  {
415
- "type": "function",
416
- "function": {
417
- "name": "youtube_transcript",
418
- "description": (
419
- "Fetch YouTube video transcript. "
420
- "If cloud-blocked, use search_web instead."
421
- ),
422
- "parameters": {
423
- "type": "object",
424
- "properties": {"video_url": {"type": "string"}},
425
- "required": ["video_url"],
426
- },
427
  },
428
  },
429
  {
430
- "type": "function",
431
- "function": {
432
- "name": "search_web",
433
- "description": "Search the web via DuckDuckGo. Returns top result snippets.",
434
- "parameters": {
435
- "type": "object",
436
- "properties": {"query": {"type": "string"}},
437
- "required": ["query"],
438
- },
439
  },
440
  },
441
  {
442
- "type": "function",
443
- "function": {
444
- "name": "fetch_webpage",
445
- "description": "Fetch and read the full text of any URL.",
446
- "parameters": {
447
- "type": "object",
448
- "properties": {"url": {"type": "string"}},
449
- "required": ["url"],
450
- },
451
  },
452
  },
453
  {
454
- "type": "function",
455
- "function": {
456
- "name": "fetch_wikipedia",
457
- "description": (
458
- "Fetch a Wikipedia article by exact title via REST API. "
459
- "Always prefer this over fetch_webpage for Wikipedia."
460
- ),
461
- "parameters": {
462
- "type": "object",
463
- "properties": {"title": {"type": "string"}},
464
- "required": ["title"],
465
- },
466
  },
467
  },
468
  ]
@@ -495,9 +472,7 @@ class BasicAgent:
495
  # ── system prompt ─────────────────────────────────────────────────────────
496
 
497
  SYSTEM = """You are a precise research agent solving GAIA benchmark tasks.
498
-
499
  MANDATORY WORKFLOW:
500
-
501
  STEP 1 β€” Call check_file(task_id) first for every task.
502
  β€’ NO_FILE β†’ go to STEP 2.
503
  β€’ image file β†’ call analyse_image(task_id, question).
@@ -506,7 +481,6 @@ STEP 1 β€” Call check_file(task_id) first for every task.
506
  β€’ audio file β†’ call transcribe_audio(task_id), then answer from transcript.
507
  β€’ text/pdf file β†’ call read_text_file(task_id), then answer from content.
508
  NEVER return "NO_FILE" or tool status strings as your final answer.
509
-
510
  STEP 2 β€” Gather information.
511
  β€’ YouTube URL β†’ call youtube_transcript(url). If BLOCKED β†’ search_web.
512
  β€’ Wikipedia question β†’ fetch_wikipedia("Exact Article Title").
@@ -515,10 +489,8 @@ STEP 2 β€” Gather information.
515
  https://chem.libretexts.org/Bookshelves/Introductory_Chemistry/Introductory_Chemistry_(LibreTexts)/02%3A_Measurement_and_Problem_Solving/2.E%3A_Measurement_and_Problem_Solving_(Exercises)
516
  β€’ Sports stats β†’ search_web then fetch_webpage for exact numbers.
517
  β€’ Any other question β†’ search_web, then fetch_webpage for details.
518
-
519
  STEP 3 β€” Try at least 2-3 different search queries before concluding.
520
  Never say "I was unable to find." Always use tools to find the answer.
521
-
522
  STEP 4 β€” Final answer: ONLY the value. No explanation. No preamble.
523
  Numbers: just digits. Names: just the name. Lists: comma-separated."""
524
 
@@ -528,7 +500,6 @@ STEP 4 β€” Final answer: ONLY the value. No explanation. No preamble.
528
  print(f"β–Ά Task {task_id[:8]}: {question[:80]}")
529
 
530
  messages = [
531
- {"role": "system", "content": self.SYSTEM},
532
  {
533
  "role": "user",
534
  "content": f"task_id: {task_id}\n\nTask: {question}",
@@ -543,35 +514,31 @@ STEP 4 β€” Final answer: ONLY the value. No explanation. No preamble.
543
 
544
  for _round in range(10):
545
  try:
546
- resp = self.client.chat_completion(
547
- messages=messages,
548
- tools=self.TOOLS,
549
- tool_choice="auto",
550
  max_tokens=1500,
551
- temperature=0.1,
 
 
552
  )
553
  except Exception as e:
554
- print(f" HF API error: {e}")
555
- # Retry without tools if tool_choice unsupported
556
- try:
557
- resp = self.client.chat_completion(
558
- messages=messages,
559
- max_tokens=500,
560
- temperature=0.1,
561
- )
562
- return (resp.choices[0].message.content or "").strip()
563
- except Exception as e2:
564
- print(f" Fallback error: {e2}")
565
- return "Error."
566
-
567
- msg = resp.choices[0].message
568
- tool_calls = getattr(msg, "tool_calls", None)
569
-
570
- # No tool calls β†’ final answer
571
- if not tool_calls:
572
- answer = (msg.content or "").strip()
573
  if any(b in answer.lower() for b in bad_phrases):
574
- messages.append({"role": "assistant", "content": answer})
575
  messages.append({
576
  "role": "user",
577
  "content": (
@@ -582,57 +549,35 @@ STEP 4 β€” Final answer: ONLY the value. No explanation. No preamble.
582
  continue
583
  return answer
584
 
585
- # Append assistant message with tool calls
586
- messages.append({
587
- "role": "assistant",
588
- "content": msg.content or "",
589
- "tool_calls": [
590
- {
591
- "id": tc.id,
592
- "type": "function",
593
- "function": {
594
- "name": tc.function.name,
595
- "arguments": tc.function.arguments
596
- if isinstance(tc.function.arguments, str)
597
- else json.dumps(tc.function.arguments),
598
- },
599
- }
600
- for tc in tool_calls
601
- ],
602
- })
603
-
604
- # Execute tools
605
- for tc in tool_calls:
606
- fn = tc.function.name
607
- try:
608
- raw_args = tc.function.arguments
609
- args = (
610
- json.loads(raw_args)
611
- if isinstance(raw_args, str)
612
- else raw_args
613
- )
614
- except Exception:
615
- args = {}
616
-
617
  result = self._dispatch(fn, args, task_id, question)
618
  print(f" {fn} β†’ {str(result)[:80]}")
619
-
620
- messages.append({
621
- "role": "tool",
622
- "tool_call_id": tc.id,
623
  "content": result or "Empty result.",
624
  })
625
 
 
 
626
  # Force final answer after max rounds
627
  try:
628
  messages.append({
629
  "role": "user",
630
  "content": "Final answer only β€” just the value, no explanation.",
631
  })
632
- resp = self.client.chat_completion(
633
- messages=messages, max_tokens=100, temperature=0.1,
 
 
 
634
  )
635
- return (resp.choices[0].message.content or "").strip()
 
636
  except Exception:
637
  return "Error."
638
 
@@ -702,11 +647,11 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
702
 
703
 
704
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
705
- gr.Markdown("# πŸ€– GAIA Agent β€” Free HuggingFace Models")
706
  gr.Markdown(
707
- f"**LLM:** `{HF_MODEL}` (free via HF Inference API) \n"
708
- "**Vision:** `Qwen/Qwen2.5-VL-72B-Instruct` \n"
709
- "**ASR:** `openai/whisper-large-v3`"
710
  )
711
  gr.LoginButton()
712
  run_button = gr.Button("πŸš€ Run Evaluation & Submit", variant="primary")
 
8
  import pandas as pd
9
  import gradio as gr
10
  from huggingface_hub import InferenceClient
11
+ import anthropic
12
 
13
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
14
 
 
 
 
 
15
  # ── helpers ───────────────────────────────────────────────────────────────────
 
 
 
 
 
 
 
 
 
 
 
16
  def _strip_html(html: str) -> str:
17
  from html.parser import HTMLParser
18
 
 
44
 
45
  class BasicAgent:
46
  def __init__(self):
47
+ # Use Anthropic API β€” no HF credits needed
48
+ self.anthropic_client = anthropic.Anthropic(
49
+ api_key=os.environ.get("ANTHROPIC_API_KEY", "")
 
 
50
  )
51
+ self.model = "claude-sonnet-4-20250514"
52
+
53
+ # Keep HF client only for Whisper ASR (free, no Inference Provider needed)
54
+ hf_token = self._get_hf_token()
55
+ self.hf_token = hf_token
56
+ self.hf_client = InferenceClient(token=hf_token) if hf_token else None
57
+
58
  self.api_url = DEFAULT_API_URL
59
+ print(f"βœ… Agent initialised with model: {self.model}")
60
+
61
+ def _get_hf_token(self):
62
+ for var in ("HF_TOKEN", "HUGGING_FACE_HUB_TOKEN", "HUGGINGFACE_HUB_TOKEN"):
63
+ token = os.getenv(var, "").strip()
64
+ if token:
65
+ return token
66
+ return None
67
 
68
  # ── raw file fetch ────────────────────────────────────────────────────────
69
 
 
92
  )
93
 
94
  def tool_analyse_image(self, task_id: str, question: str) -> str:
95
+ """Analyse image using Claude's vision."""
96
  fb, ct = self._fetch_file(task_id)
97
  if not fb:
98
  return "No image found."
 
101
  return f"File is not an image (type={ct_clean})."
102
  b64 = base64.b64encode(fb).decode()
103
 
104
+ # Map content type to Anthropic media type
105
+ media_map = {
106
+ "image/jpeg": "image/jpeg",
107
+ "image/jpg": "image/jpeg",
108
+ "image/png": "image/png",
109
+ "image/gif": "image/gif",
110
+ "image/webp": "image/webp",
111
+ }
112
+ media_type = media_map.get(ct_clean, "image/jpeg")
113
+
114
  try:
115
+ response = self.anthropic_client.messages.create(
116
+ model=self.model,
117
+ max_tokens=800,
118
  messages=[{
119
  "role": "user",
120
  "content": [
121
  {
122
+ "type": "image",
123
+ "source": {
124
+ "type": "base64",
125
+ "media_type": media_type,
126
+ "data": b64,
127
  },
128
  },
129
  {"type": "text", "text": question},
130
  ],
131
  }],
 
132
  )
133
+ return response.content[0].text
134
  except Exception as e:
135
+ return f"Vision error: {e}"
 
136
 
137
  def tool_run_python_file(self, task_id: str) -> str:
138
  """Download and execute Python file, return stdout."""
 
170
  else pd.read_excel(io.BytesIO(fb))
171
  )
172
  preview = df.to_string(max_rows=80, max_cols=20)
 
173
  return (
174
  f"SPREADSHEET DATA:\n{preview}\n\n"
175
  f"Answer the following about this data: {question}"
 
178
  return f"Excel read error: {e}"
179
 
180
  def tool_transcribe_audio(self, task_id: str) -> str:
181
+ """Transcribe audio using HF Whisper (free ASR endpoint)."""
182
  fb, ct = self._fetch_file(task_id)
183
  if not fb:
184
  return "No file found."
 
195
  f.write(fb)
196
  fname = f.name
197
 
198
+ if self.hf_client:
199
+ asr_client = InferenceClient(
200
+ model="openai/whisper-large-v3",
201
+ token=self.hf_token,
202
+ )
203
+ with open(fname, "rb") as audio_f:
204
+ result = asr_client.automatic_speech_recognition(audio_f)
205
+ return result.text if hasattr(result, "text") else str(result)
206
+ else:
207
+ return "No HF token available for audio transcription."
208
  except Exception as e:
209
  return f"Transcription error: {e}"
210
 
 
322
  )
323
  return f"Transcript error: {err}"
324
 
325
+ # ── Anthropic tool definitions ────────────────────────────────────────────
326
 
327
  TOOLS = [
328
  {
329
+ "name": "check_file",
330
+ "description": (
331
+ "ALWAYS call this first. Checks if a file is attached to the task. "
332
+ "Returns NO_FILE or the file type and which tool to use next."
333
+ ),
334
+ "input_schema": {
335
+ "type": "object",
336
+ "properties": {"task_id": {"type": "string"}},
337
+ "required": ["task_id"],
 
 
 
338
  },
339
  },
340
  {
341
+ "name": "analyse_image",
342
+ "description": (
343
+ "Analyse an image file attached to the task using vision. "
344
+ "Use for chess boards, diagrams, photos, screenshots."
345
+ ),
346
+ "input_schema": {
347
+ "type": "object",
348
+ "properties": {
349
+ "task_id": {"type": "string"},
350
+ "question": {
351
+ "type": "string",
352
+ "description": "What to find or answer from the image.",
 
 
 
353
  },
 
354
  },
355
+ "required": ["task_id", "question"],
356
  },
357
  },
358
  {
359
+ "name": "run_python_file",
360
+ "description": (
361
+ "Execute the Python file attached to the task and return its output. "
362
+ "The stdout IS the answer."
363
+ ),
364
+ "input_schema": {
365
+ "type": "object",
366
+ "properties": {"task_id": {"type": "string"}},
367
+ "required": ["task_id"],
 
 
 
368
  },
369
  },
370
  {
371
+ "name": "read_excel_file",
372
+ "description": "Read an Excel or CSV file and answer a question about its data.",
373
+ "input_schema": {
374
+ "type": "object",
375
+ "properties": {
376
+ "task_id": {"type": "string"},
377
+ "question": {"type": "string"},
 
 
 
 
378
  },
379
+ "required": ["task_id", "question"],
380
  },
381
  },
382
  {
383
+ "name": "transcribe_audio",
384
+ "description": (
385
+ "Transcribe an audio file using Whisper. "
386
+ "Use for voice memos, recordings, audio questions."
387
+ ),
388
+ "input_schema": {
389
+ "type": "object",
390
+ "properties": {"task_id": {"type": "string"}},
391
+ "required": ["task_id"],
 
 
 
392
  },
393
  },
394
  {
395
+ "name": "read_text_file",
396
+ "description": "Read a text or PDF file attached to the task.",
397
+ "input_schema": {
398
+ "type": "object",
399
+ "properties": {"task_id": {"type": "string"}},
400
+ "required": ["task_id"],
 
 
 
401
  },
402
  },
403
  {
404
+ "name": "youtube_transcript",
405
+ "description": (
406
+ "Fetch YouTube video transcript. "
407
+ "If cloud-blocked, use search_web instead."
408
+ ),
409
+ "input_schema": {
410
+ "type": "object",
411
+ "properties": {"video_url": {"type": "string"}},
412
+ "required": ["video_url"],
 
 
 
413
  },
414
  },
415
  {
416
+ "name": "search_web",
417
+ "description": "Search the web via DuckDuckGo. Returns top result snippets.",
418
+ "input_schema": {
419
+ "type": "object",
420
+ "properties": {"query": {"type": "string"}},
421
+ "required": ["query"],
 
 
 
422
  },
423
  },
424
  {
425
+ "name": "fetch_webpage",
426
+ "description": "Fetch and read the full text of any URL.",
427
+ "input_schema": {
428
+ "type": "object",
429
+ "properties": {"url": {"type": "string"}},
430
+ "required": ["url"],
 
 
 
431
  },
432
  },
433
  {
434
+ "name": "fetch_wikipedia",
435
+ "description": (
436
+ "Fetch a Wikipedia article by exact title via REST API. "
437
+ "Always prefer this over fetch_webpage for Wikipedia."
438
+ ),
439
+ "input_schema": {
440
+ "type": "object",
441
+ "properties": {"title": {"type": "string"}},
442
+ "required": ["title"],
 
 
 
443
  },
444
  },
445
  ]
 
472
  # ── system prompt ─────────────────────────────────────────────────────────
473
 
474
  SYSTEM = """You are a precise research agent solving GAIA benchmark tasks.
 
475
  MANDATORY WORKFLOW:
 
476
  STEP 1 β€” Call check_file(task_id) first for every task.
477
  β€’ NO_FILE β†’ go to STEP 2.
478
  β€’ image file β†’ call analyse_image(task_id, question).
 
481
  β€’ audio file β†’ call transcribe_audio(task_id), then answer from transcript.
482
  β€’ text/pdf file β†’ call read_text_file(task_id), then answer from content.
483
  NEVER return "NO_FILE" or tool status strings as your final answer.
 
484
  STEP 2 β€” Gather information.
485
  β€’ YouTube URL β†’ call youtube_transcript(url). If BLOCKED β†’ search_web.
486
  β€’ Wikipedia question β†’ fetch_wikipedia("Exact Article Title").
 
489
  https://chem.libretexts.org/Bookshelves/Introductory_Chemistry/Introductory_Chemistry_(LibreTexts)/02%3A_Measurement_and_Problem_Solving/2.E%3A_Measurement_and_Problem_Solving_(Exercises)
490
  β€’ Sports stats β†’ search_web then fetch_webpage for exact numbers.
491
  β€’ Any other question β†’ search_web, then fetch_webpage for details.
 
492
  STEP 3 β€” Try at least 2-3 different search queries before concluding.
493
  Never say "I was unable to find." Always use tools to find the answer.
 
494
  STEP 4 β€” Final answer: ONLY the value. No explanation. No preamble.
495
  Numbers: just digits. Names: just the name. Lists: comma-separated."""
496
 
 
500
  print(f"β–Ά Task {task_id[:8]}: {question[:80]}")
501
 
502
  messages = [
 
503
  {
504
  "role": "user",
505
  "content": f"task_id: {task_id}\n\nTask: {question}",
 
514
 
515
  for _round in range(10):
516
  try:
517
+ resp = self.anthropic_client.messages.create(
518
+ model=self.model,
 
 
519
  max_tokens=1500,
520
+ system=self.SYSTEM,
521
+ tools=self.TOOLS,
522
+ messages=messages,
523
  )
524
  except Exception as e:
525
+ print(f" Anthropic API error: {e}")
526
+ return "Error."
527
+
528
+ # Check stop reason
529
+ stop_reason = resp.stop_reason
530
+
531
+ # Collect text and tool use blocks
532
+ tool_uses = [b for b in resp.content if b.type == "tool_use"]
533
+ text_blocks = [b for b in resp.content if b.type == "text"]
534
+
535
+ # Append assistant message
536
+ messages.append({"role": "assistant", "content": resp.content})
537
+
538
+ if stop_reason == "end_turn" or not tool_uses:
539
+ # Final answer
540
+ answer = text_blocks[0].text.strip() if text_blocks else ""
 
 
 
541
  if any(b in answer.lower() for b in bad_phrases):
 
542
  messages.append({
543
  "role": "user",
544
  "content": (
 
549
  continue
550
  return answer
551
 
552
+ # Execute tool calls and collect results
553
+ tool_results = []
554
+ for tb in tool_uses:
555
+ fn = tb.name
556
+ args = tb.input if isinstance(tb.input, dict) else {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
557
  result = self._dispatch(fn, args, task_id, question)
558
  print(f" {fn} β†’ {str(result)[:80]}")
559
+ tool_results.append({
560
+ "type": "tool_result",
561
+ "tool_use_id": tb.id,
 
562
  "content": result or "Empty result.",
563
  })
564
 
565
+ messages.append({"role": "user", "content": tool_results})
566
+
567
  # Force final answer after max rounds
568
  try:
569
  messages.append({
570
  "role": "user",
571
  "content": "Final answer only β€” just the value, no explanation.",
572
  })
573
+ resp = self.anthropic_client.messages.create(
574
+ model=self.model,
575
+ max_tokens=100,
576
+ system=self.SYSTEM,
577
+ messages=messages,
578
  )
579
+ text_blocks = [b for b in resp.content if b.type == "text"]
580
+ return text_blocks[0].text.strip() if text_blocks else "Error."
581
  except Exception:
582
  return "Error."
583
 
 
647
 
648
 
649
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
650
+ gr.Markdown("# πŸ€– GAIA Agent β€” Claude Sonnet")
651
  gr.Markdown(
652
+ f"**LLM:** `claude-sonnet-4-20250514` (Anthropic API) \n"
653
+ "**Vision:** Claude native vision \n"
654
+ "**ASR:** `openai/whisper-large-v3` (HF)"
655
  )
656
  gr.LoginButton()
657
  run_button = gr.Button("πŸš€ Run Evaluation & Submit", variant="primary")