laiking commited on
Commit
498d072
·
1 Parent(s): 5cc5259
Files changed (4) hide show
  1. app.py +50 -13
  2. notebooks/llamaindex.ipynb +658 -127
  3. pyproject.toml +43 -0
  4. uv.lock +0 -0
app.py CHANGED
@@ -8,33 +8,33 @@ import json
8
  import warnings
9
  import mwclient
10
  from llama_index.core.tools import FunctionTool
11
- # from llama_index.llms.mistralai import MistralAI
12
- # from llama_index.llms.google_genai import GoogleGenAI
13
  from llama_index.llms.openrouter import OpenRouter
14
  from llama_index.core.agent.workflow import ReActAgent
15
  from llama_index.readers.web import BeautifulSoupWebReader
16
  from llama_index.tools.tavily_research import TavilyToolSpec
17
- from llama_index.core.llms import ChatMessage, TextBlock, ImageBlock, AudioBlock
18
  from llama_index.core.tools.ondemand_loader_tool import OnDemandLoaderTool
19
  from pydantic.warnings import PydanticDeprecatedSince20, PydanticDeprecatedSince211
 
 
 
 
 
 
 
20
  # Disable pydantic deprecation warnings
21
  warnings.filterwarnings("ignore", category=PydanticDeprecatedSince20)
22
  warnings.filterwarnings("ignore", category=PydanticDeprecatedSince211)
 
23
  # (Keep Constants as is)
24
  # --- Constants ---
25
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
26
 
27
  # --- Basic Agent Definition ---
28
- # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
29
 
30
- # Load LLM API (Google GenAI)
31
- # gemini_2_5_flash = GoogleGenAI(model="gemini-2.5-flash-preview-05-20") # Audio, images, vidéos et texte -> Texte
32
- # magistral_sm = MistralAI(model="magistral-small-2506") # reasoning
33
- # mistral_sm = MistralAI(model="mistral-small-latest") # general purpose + image understanding capabilities
34
  nemotron_super = OpenRouter(model="nvidia/llama-3.3-nemotron-super-49b-v1:free") # advanced reasoning, conversational interactions, retrieval-augmented generation (RAG), and tool-calling tasks
35
 
36
-
37
- # Tools
38
 
39
  def get_page(page_query:str):
40
  """Send a query to wikipedia and return the text of the page found if it is found, else return an empty string."""
@@ -83,7 +83,38 @@ tools = [
83
 
84
  GAIA_PROMPT = """You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."""
85
 
86
- def run_and_submit_all( profile: gr.OAuthProfile | None):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  """
88
  Fetches all questions, runs the BasicAgent on them, submits all answers,
89
  and displays the results.
@@ -109,6 +140,8 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
109
  llm=nemotron_super,
110
  tools=tools,
111
  system_prompt="detailed thinking off",
 
 
112
  )
113
  except Exception as e:
114
  print(f"Error instantiating agent: {e}")
@@ -118,7 +151,8 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
118
  print(agent_code)
119
 
120
  # 2. Fetch Questions
121
- with open("data/gaia-tasks.json","r") as f:
 
122
  try:
123
  questions_data = json.load(f)
124
  except json.JSONDecodeError as e:
@@ -134,13 +168,16 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
134
  for item in questions_data:
135
  task_id = item.get("task_id")
136
  question_text = item.get("question")
 
 
137
  if not task_id or question_text is None:
138
  print(f"Skipping item with missing task_id or question: {item}")
139
  continue
 
140
  try:
141
  prompt = f"{GAIA_PROMPT}\nQuestion: {question_text}"
142
  message = ChatMessage(role="user",content=prompt) # TODO: handle files/multimodal inputs
143
- agent_answer = agent.run(user_msg=message)
144
  # Parsing agents answer
145
  pattern = r'(?:final\s+)?answer\s*:\s*(.*)'
146
  match = re.search(pattern, agent_answer.response.blocks[-1].text, re.IGNORECASE)
 
8
  import warnings
9
  import mwclient
10
  from llama_index.core.tools import FunctionTool
 
 
11
  from llama_index.llms.openrouter import OpenRouter
12
  from llama_index.core.agent.workflow import ReActAgent
13
  from llama_index.readers.web import BeautifulSoupWebReader
14
  from llama_index.tools.tavily_research import TavilyToolSpec
15
+ from llama_index.core.llms import ChatMessage
16
  from llama_index.core.tools.ondemand_loader_tool import OnDemandLoaderTool
17
  from pydantic.warnings import PydanticDeprecatedSince20, PydanticDeprecatedSince211
18
+
19
+ # Get environment variables for local testing
20
+ file_path = os.path.dirname(os.path.abspath(__file__))
21
+ environment_file = os.path.join(file_path, ".env")
22
+ if os.path.exists(environment_file): # Load environment variables from .env file
23
+ from dotenv import load_dotenv
24
+ load_dotenv(environment_file)
25
  # Disable pydantic deprecation warnings
26
  warnings.filterwarnings("ignore", category=PydanticDeprecatedSince20)
27
  warnings.filterwarnings("ignore", category=PydanticDeprecatedSince211)
28
+
29
  # (Keep Constants as is)
30
  # --- Constants ---
31
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
32
 
33
  # --- Basic Agent Definition ---
 
34
 
 
 
 
 
35
  nemotron_super = OpenRouter(model="nvidia/llama-3.3-nemotron-super-49b-v1:free") # advanced reasoning, conversational interactions, retrieval-augmented generation (RAG), and tool-calling tasks
36
 
37
+ # --- Tools ---
 
38
 
39
  def get_page(page_query:str):
40
  """Send a query to wikipedia and return the text of the page found if it is found, else return an empty string."""
 
83
 
84
  GAIA_PROMPT = """You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."""
85
 
86
+ def extract_final_answer(response_text: str) -> str:
87
+ """Extract the final answer from agent response text."""
88
+ if not response_text:
89
+ return "ERROR: Empty response"
90
+
91
+ # Try multiple patterns to extract final answer
92
+ patterns = [
93
+ r'(?:final\s+)?answer\s*:\s*(.*?)(?:\n|$)',
94
+ r'answer\s*:\s*(.*?)(?:\n|$)',
95
+ r'final\s*:\s*(.*?)(?:\n|$)',
96
+ ]
97
+
98
+ for pattern in patterns:
99
+ match = re.search(pattern, response_text, re.IGNORECASE | re.DOTALL)
100
+ if match:
101
+ answer = match.group(1).strip()
102
+ # Clean up the answer
103
+ answer = re.sub(r'\s+', ' ', answer) # Normalize whitespace
104
+ answer = answer.replace('```', '').strip() # Remove code blocks
105
+ if answer and len(answer) < 500: # Reasonable length check
106
+ return answer
107
+
108
+ # Fallback: return last line if no pattern matches
109
+ lines = response_text.strip().split('\n')
110
+ if lines:
111
+ last_line = lines[-1].strip()
112
+ if last_line and len(last_line) < 200:
113
+ return last_line
114
+
115
+ return "No clear final answer found"
116
+
117
+ async def run_and_submit_all( profile: gr.OAuthProfile | None):
118
  """
119
  Fetches all questions, runs the BasicAgent on them, submits all answers,
120
  and displays the results.
 
140
  llm=nemotron_super,
141
  tools=tools,
142
  system_prompt="detailed thinking off",
143
+ max_iterations=10,
144
+ verbose=True,
145
  )
146
  except Exception as e:
147
  print(f"Error instantiating agent: {e}")
 
151
  print(agent_code)
152
 
153
  # 2. Fetch Questions
154
+ data_path = os.path.join(file_path, "data", "gaia-tasks.json")
155
+ with open(data_path,"r") as f:
156
  try:
157
  questions_data = json.load(f)
158
  except json.JSONDecodeError as e:
 
168
  for item in questions_data:
169
  task_id = item.get("task_id")
170
  question_text = item.get("question")
171
+ file_name = item.get("file_name", "")
172
+
173
  if not task_id or question_text is None:
174
  print(f"Skipping item with missing task_id or question: {item}")
175
  continue
176
+
177
  try:
178
  prompt = f"{GAIA_PROMPT}\nQuestion: {question_text}"
179
  message = ChatMessage(role="user",content=prompt) # TODO: handle files/multimodal inputs
180
+ agent_answer = await agent.run(user_msg=message)
181
  # Parsing agents answer
182
  pattern = r'(?:final\s+)?answer\s*:\s*(.*)'
183
  match = re.search(pattern, agent_answer.response.blocks[-1].text, re.IGNORECASE)
notebooks/llamaindex.ipynb CHANGED
@@ -18,7 +18,7 @@
18
  },
19
  {
20
  "cell_type": "code",
21
- "execution_count": null,
22
  "id": "a7101ff8",
23
  "metadata": {},
24
  "outputs": [
@@ -26,7 +26,7 @@
26
  "name": "stderr",
27
  "output_type": "stream",
28
  "text": [
29
- "/home/laiking/code/learning/agents/.venv/lib/python3.12/site-packages/pydantic/_internal/_config.py:323: PydanticDeprecatedSince20: Support for class-based `config` is deprecated, use ConfigDict instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/\n",
30
  " warnings.warn(DEPRECATION_MESSAGE, DeprecationWarning)\n"
31
  ]
32
  }
@@ -38,20 +38,27 @@
38
  "import warnings\n",
39
  "import requests\n",
40
  "import mwclient\n",
 
 
 
 
41
  "from dotenv import load_dotenv\n",
 
 
 
42
  "\n",
43
  "from llama_index.core.tools import FunctionTool\n",
44
  "from llama_index.llms.mistralai import MistralAI\n",
45
  "from llama_index.llms.openrouter import OpenRouter\n",
46
  "from llama_index.llms.google_genai import GoogleGenAI\n",
47
- "from llama_index.core.agent.workflow import ReActAgent\n",
48
- "\n",
49
  "from llama_index.readers.web import BeautifulSoupWebReader\n",
50
  "from llama_index.tools.tavily_research import TavilyToolSpec\n",
51
- "from llama_index.core.llms import ChatMessage, TextBlock, ImageBlock, AudioBlock\n",
52
  "from llama_index.core.tools.ondemand_loader_tool import OnDemandLoaderTool\n",
 
 
53
  "\n",
54
  "from pydantic.warnings import PydanticDeprecatedSince20, PydanticDeprecatedSince211\n",
 
55
  "# Load environment variables from .env file\n",
56
  "load_dotenv()\n",
57
  "# Disable pydantic deprecation warnings\n",
@@ -61,7 +68,7 @@
61
  },
62
  {
63
  "cell_type": "code",
64
- "execution_count": 3,
65
  "id": "a4bfbbc9",
66
  "metadata": {},
67
  "outputs": [],
@@ -89,46 +96,45 @@
89
  " raise Exception(f\"Failed to fetch task file for task_id {task_id}: {response.text}\")\n",
90
  " \n",
91
  " @staticmethod\n",
92
- " def submit_answer(task_id, answer):\n",
93
- " data = {\n",
94
- " \"task_id\": task_id,\n",
95
- " \"answer\": answer\n",
96
- " }\n",
97
- " response = requests.post(HFAgentsCourseAPI.API_URL + \"/submit-answer\", json=data)\n",
98
- " return response.json()"
 
 
 
 
 
 
 
 
99
  ]
100
  },
101
  {
102
  "cell_type": "code",
103
- "execution_count": 4,
104
  "id": "ab28e8d7",
105
  "metadata": {},
106
  "outputs": [],
107
  "source": [
108
  "# data loading and downloading (if not)\n",
109
- "data_dir = \"/home/laiking/code/learning/agents/agents-course-project/data/\"\n",
110
- "gaia_filename = \"gaia-val-20.json\"\n",
111
- "\n",
112
  "# download questions\n",
113
- "if not os.path.exists(data_dir + gaia_filename):\n",
114
  " data = HFAgentsCourseAPI.get_all_questions()\n",
115
- " with open(data_dir + gaia_filename, \"w\") as f:\n",
116
  " json.dump(data, f, indent=4)\n",
117
  "else:\n",
118
- " with open(data_dir + gaia_filename, \"r\") as f:\n",
119
- " data = json.load(f)\n",
120
- "\n",
121
- "# download task files\n",
122
- "for el in data:\n",
123
- " if el[\"file_name\"] and not os.path.exists(data_dir + el[\"file_name\"]):\n",
124
- " file = HFAgentsCourseAPI.get_task_file(el[\"task_id\"])\n",
125
- " with open(data_dir + el[\"file_name\"], \"wb\") as f:\n",
126
- " f.write(file) "
127
  ]
128
  },
129
  {
130
  "cell_type": "code",
131
- "execution_count": 5,
132
  "id": "94f848b8",
133
  "metadata": {},
134
  "outputs": [],
@@ -165,12 +171,12 @@
165
  },
166
  {
167
  "cell_type": "code",
168
- "execution_count": 9,
169
  "id": "2baeb38b",
170
  "metadata": {},
171
  "outputs": [],
172
  "source": [
173
- "# 30% tools\n",
174
  "\n",
175
  "def get_page(page_query:str):\n",
176
  " \"\"\"Send a query to wikipedia and return the text of the page found if it is found, else return an empty string.\"\"\"\n",
@@ -184,10 +190,98 @@
184
  " \"\"\"Reverse a string.\"\"\"\n",
185
  " return s[::-1]\n",
186
  "\n",
187
- "async def reverse_string_async(s: str) -> str:\n",
188
- " \"\"\"Asynchronous version of reverse_string.\"\"\"\n",
189
- " return s[::-1]\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
  "\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
  "wiki_page_tool = FunctionTool.from_defaults(\n",
192
  " get_page,\n",
193
  " name=\"WikipediaTool\",\n",
@@ -198,7 +292,36 @@
198
  " reverse_string,\n",
199
  " name=\"ReverseStringTool\",\n",
200
  " description=\"Reverse a string and return it.\",\n",
201
- " async_fn=reverse_string_async\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
202
  ")\n",
203
  "\n",
204
  "tavily_tools = TavilyToolSpec(\n",
@@ -211,161 +334,569 @@
211
  " description=\"A tool for reading web pages. Provide a URL to read the content of the page.\",\n",
212
  ")\n",
213
  "\n",
 
214
  "tools = [\n",
215
  " wiki_page_tool,\n",
 
216
  " reverse_string_tool,\n",
 
 
 
 
217
  " web_page_reader_tool,\n",
218
  "] + tavily_tools"
219
  ]
220
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
221
  {
222
  "cell_type": "code",
223
- "execution_count": null,
224
- "id": "d298a6b6",
225
  "metadata": {},
226
  "outputs": [],
227
  "source": [
228
- "GAIA_PROMPT = \"\"\"You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.\"\"\"\n",
 
 
 
 
 
 
 
 
 
 
 
229
  "\n",
230
- "main_agent = ReActAgent(\n",
231
- " name=\"Gaia Agent\",\n",
232
- " description=\"General AI assistant\",\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
  " llm=nemotron_super,\n",
234
  " tools=tools,\n",
235
  " system_prompt=\"detailed thinking off\",\n",
236
  ")\n",
237
  "\n",
238
- "multimodal_agent = ReActAgent(\n",
239
- " name=\"Gaia Multimodal Agent\",\n",
240
- " description=\"General AI assistant with multimodal capabilities\",\n",
241
- " llm=gemini_2_5_flash,\n",
242
- " system_prompt=GAIA_PROMPT,\n",
 
 
243
  ")\n",
244
  "\n",
245
- "reasoning_agent = ReActAgent(\n",
246
- " name=\"Gaia Reasoning Agent\",\n",
247
- " description=\"General AI assistant with advanced reasoning capabilities\",\n",
 
248
  " llm=nemotron_super,\n",
249
- " system_prompt=\"detailed thinking on\",\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
250
  ")"
251
  ]
252
  },
253
  {
254
- "cell_type": "markdown",
255
- "id": "e73306e5",
 
256
  "metadata": {},
 
257
  "source": [
258
- "## Testing on selected GAIA example"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259
  ]
260
  },
261
  {
262
  "cell_type": "code",
263
- "execution_count": 16,
264
- "id": "f2629455",
265
  "metadata": {},
266
  "outputs": [
267
  {
268
  "name": "stdout",
269
  "output_type": "stream",
270
  "text": [
271
- "{'task_id': '305ac316-eef6-4446-960a-92d80d542f82', 'submitted_answer': 'Wojciech'}\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
272
  ]
273
  }
274
  ],
275
  "source": [
276
- "example = data[10]\n",
277
- "prompt = f\"{GAIA_PROMPT}\\nQuestion: {example['question']}\"\n",
278
- "message = ChatMessage(\n",
279
- " role=\"user\",\n",
280
- " content=prompt,\n",
281
- ")\n",
282
- "agent_answer = await main_agent.run(user_msg=message)\n",
283
- "pattern = r'(?:final\\s+)?answer\\s*:\\s*(.*)'\n",
284
- "match = re.search(pattern, agent_answer.response.blocks[-1].text, re.IGNORECASE)\n",
285
- "print({\n",
286
- " \"task_id\": example[\"task_id\"],\n",
287
- " \"submitted_answer\": match.group(1) if match else \"No final answer found\",\n",
288
- "})"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
289
  ]
290
  },
291
  {
292
- "cell_type": "markdown",
293
- "id": "81fccb87",
 
294
  "metadata": {},
 
 
 
 
 
 
 
 
 
 
295
  "source": [
296
- "## Testing on all GAIA"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
297
  ]
298
  },
299
  {
300
  "cell_type": "code",
301
- "execution_count": null,
302
- "id": "e9b3910c",
303
  "metadata": {},
304
  "outputs": [
305
  {
306
  "name": "stdout",
307
  "output_type": "stream",
308
  "text": [
309
- "You are a general AI assistant. I will ask you a question. Report your thoughts, and finish\n",
310
- "your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER].\n",
311
- "YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of\n",
312
- "numbers and/or strings.\n",
313
- "If you are asked for a number, don’t use comma to write your number neither use units such as $ or percent\n",
314
- "sign unless specified otherwise.\n",
315
- "If you are asked for a string, don’t use articles, neither abbreviations (e.g. for cities), and write the digits in\n",
316
- "plain text unless specified otherwise.\n",
317
- "If you are asked for a comma separated list, apply the above rules depending of whether the element to be put\n",
318
- "in the list is a number or a string. \n",
319
- "Question: How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia.\n",
320
- "Agent response: Thought: After reviewing the provided Wikipedia page for Mercedes Sosa, I've located the discography section which lists her studio albums. To determine how many were published between 2000 and 2009 (inclusive), I'll examine the \"Studio albums\" table for release years within that range.\n",
321
- "\n",
322
- "Action: None (Information found within provided observation)\n",
323
- "\n",
324
- "Observation Analysis:\n",
325
- "- **2005**: Corazón Libre\n",
326
- "- **2009**: Cantora 1, Cantora 2\n",
327
  "\n",
328
- "Thought: Based on the discography provided, Mercedes Sosa released 3 studio albums between 2000 and 2009.\n",
329
- "\n",
330
- "Answer: 3\n"
 
 
331
  ]
332
  }
333
  ],
334
  "source": [
335
- "DATA_DIR = \"agents-course-project/data\"\n",
336
- "answers = []\n",
337
- "for example in data:\n",
338
- " # Initial prompt \n",
339
- " message = ChatMessage(\n",
340
- " role=\"user\",\n",
341
- " blocks=TextBlock(text=f\"{GAIA_PROMPT}\\nQuestion: {example['question']}\")\n",
342
- " )\n",
343
  " \n",
344
- " # Add file handling\n",
345
- " if example[\"file_name\"]:\n",
346
- " file_path = os.path.join(DATA_DIR, example[\"file_name\"])\n",
347
- " if example[\"file_name\"].endswith(\".mp3\"):\n",
348
- " file_block = AudioBlock(path=file_path, format=\"mp3\")\n",
349
- " elif example[\"file_name\"].endswith(\".py\"):\n",
350
- " print(\"Python file found, skipping...\")\n",
351
- " continue\n",
352
- " elif example[\"file_name\"].endswith(\".png\"):\n",
353
- " print(\"Image file found, skipping...\")\n",
354
- " continue\n",
355
- " elif example[\"file_name\"].endswith(\".xlsx\"):\n",
356
- " print(\"Excel file found, skipping...\")\n",
357
- " continue\n",
358
  " \n",
359
- " agent_answer = await main_agent.run(user_msg=message)\n",
360
- " print(f\"Agent response: {agent_answer}\")\n",
361
- " # Parsing final answer from agent response\n",
362
- " pattern = r'(?:final\\s+)?answer\\s*:\\s*(.*)'\n",
363
- " match = re.search(pattern, agent_answer.response.blocks[-1].text, re.IGNORECASE)\n",
364
- " answers.append({\n",
365
- " \"task_id\": example[\"task_id\"],\n",
366
- " \"submitted_answer\": agent_answer\n",
367
- " })\n",
368
- " break"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
369
  ]
370
  }
371
  ],
 
18
  },
19
  {
20
  "cell_type": "code",
21
+ "execution_count": 3,
22
  "id": "a7101ff8",
23
  "metadata": {},
24
  "outputs": [
 
26
  "name": "stderr",
27
  "output_type": "stream",
28
  "text": [
29
+ "/home/laiking/code/learning/agents-course-pj/.venv/lib/python3.12/site-packages/pydantic/_internal/_config.py:323: PydanticDeprecatedSince20: Support for class-based `config` is deprecated, use ConfigDict instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/\n",
30
  " warnings.warn(DEPRECATION_MESSAGE, DeprecationWarning)\n"
31
  ]
32
  }
 
38
  "import warnings\n",
39
  "import requests\n",
40
  "import mwclient\n",
41
+ "import asyncio\n",
42
+ "import pandas as pd\n",
43
+ "import subprocess\n",
44
+ "import tempfile\n",
45
  "from dotenv import load_dotenv\n",
46
+ "from typing import List, Dict, Optional\n",
47
+ "from urllib.parse import urlparse, parse_qs\n",
48
+ "from youtube_transcript_api import YouTubeTranscriptApi\n",
49
  "\n",
50
  "from llama_index.core.tools import FunctionTool\n",
51
  "from llama_index.llms.mistralai import MistralAI\n",
52
  "from llama_index.llms.openrouter import OpenRouter\n",
53
  "from llama_index.llms.google_genai import GoogleGenAI\n",
 
 
54
  "from llama_index.readers.web import BeautifulSoupWebReader\n",
55
  "from llama_index.tools.tavily_research import TavilyToolSpec\n",
 
56
  "from llama_index.core.tools.ondemand_loader_tool import OnDemandLoaderTool\n",
57
+ "from llama_index.core.llms import ChatMessage, TextBlock, ImageBlock, AudioBlock\n",
58
+ "from llama_index.core.agent.workflow import ReActAgent,FunctionAgent, AgentWorkflow\n",
59
  "\n",
60
  "from pydantic.warnings import PydanticDeprecatedSince20, PydanticDeprecatedSince211\n",
61
+ "\n",
62
  "# Load environment variables from .env file\n",
63
  "load_dotenv()\n",
64
  "# Disable pydantic deprecation warnings\n",
 
68
  },
69
  {
70
  "cell_type": "code",
71
+ "execution_count": 4,
72
  "id": "a4bfbbc9",
73
  "metadata": {},
74
  "outputs": [],
 
96
  " raise Exception(f\"Failed to fetch task file for task_id {task_id}: {response.text}\")\n",
97
  " \n",
98
  " @staticmethod\n",
99
+ " def submit_agent_answers(username: str,agent_code: str,answers: List[Dict[str, str]]) -> Optional[Dict]:\n",
100
+ " \"\"\"answers keys are task_id and submitted_answer\"\"\"\n",
101
+ " url = f\"{HFAgentsCourseAPI.API_URL}/submit\"\n",
102
+ " payload = {\"username\": username,\"agent_code\": agent_code,\"answers\": answers}\n",
103
+ " headers = {\"Content-Type\": \"application/json\"}\n",
104
+ " try:\n",
105
+ " response = requests.post(url,json.dumps(payload),headers=headers,timeout=30)\n",
106
+ " response.raise_for_status()\n",
107
+ " return response.json()\n",
108
+ " except requests.exceptions.RequestException as e:\n",
109
+ " print(f\"Erreur lors de la requête: {e}\")\n",
110
+ " return None\n",
111
+ " except json.JSONDecodeError as e:\n",
112
+ " print(f\"Erreur lors du décodage JSON: {e}\")\n",
113
+ " return None"
114
  ]
115
  },
116
  {
117
  "cell_type": "code",
118
+ "execution_count": 11,
119
  "id": "ab28e8d7",
120
  "metadata": {},
121
  "outputs": [],
122
  "source": [
123
  "# data loading and downloading (if not)\n",
124
+ "filepath = \"/home/laiking/code/learning/agents-course-pj/data/gaia-tasks.json\"\n",
 
 
125
  "# download questions\n",
126
+ "if not os.path.exists(filepath):\n",
127
  " data = HFAgentsCourseAPI.get_all_questions()\n",
128
+ " with open(filepath, \"w\") as f:\n",
129
  " json.dump(data, f, indent=4)\n",
130
  "else:\n",
131
+ " with open(filepath, \"r\") as f:\n",
132
+ " data = json.load(f)"
 
 
 
 
 
 
 
133
  ]
134
  },
135
  {
136
  "cell_type": "code",
137
+ "execution_count": 6,
138
  "id": "94f848b8",
139
  "metadata": {},
140
  "outputs": [],
 
171
  },
172
  {
173
  "cell_type": "code",
174
+ "execution_count": 7,
175
  "id": "2baeb38b",
176
  "metadata": {},
177
  "outputs": [],
178
  "source": [
179
+ "# Tools\n",
180
  "\n",
181
  "def get_page(page_query:str):\n",
182
  " \"\"\"Send a query to wikipedia and return the text of the page found if it is found, else return an empty string.\"\"\"\n",
 
190
  " \"\"\"Reverse a string.\"\"\"\n",
191
  " return s[::-1]\n",
192
  "\n",
193
+ "def get_youtube_transcript(video_url: str) -> str:\n",
194
+ " \"\"\"Get the transcript/subtitles of a YouTube video.\"\"\"\n",
195
+ " try:\n",
196
+ " # Extract video ID from URL\n",
197
+ " parsed_url = urlparse(video_url)\n",
198
+ " if parsed_url.hostname == 'youtu.be':\n",
199
+ " video_id = parsed_url.path[1:]\n",
200
+ " elif parsed_url.hostname in ('www.youtube.com', 'youtube.com'):\n",
201
+ " if 'watch' in parsed_url.path:\n",
202
+ " video_id = parse_qs(parsed_url.query)['v'][0]\n",
203
+ " elif 'embed' in parsed_url.path:\n",
204
+ " video_id = parsed_url.path.split('/')[-1]\n",
205
+ " else:\n",
206
+ " return \"Invalid YouTube URL\"\n",
207
+ " \n",
208
+ " # Get transcript\n",
209
+ " transcript_list = YouTubeTranscriptApi.get_transcript(video_id)\n",
210
+ " transcript_text = \" \".join([item['text'] for item in transcript_list])\n",
211
+ " return transcript_text\n",
212
+ " except Exception as e:\n",
213
+ " return f\"Error getting transcript: {str(e)}\"\n",
214
+ "\n",
215
+ "def execute_python_code(code: str) -> str:\n",
216
+ " \"\"\"Execute Python code safely and return the output.\"\"\"\n",
217
+ " try:\n",
218
+ " # Create a temporary file\n",
219
+ " with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f:\n",
220
+ " f.write(code)\n",
221
+ " temp_file = f.name\n",
222
+ " # Execute the code\n",
223
+ " result = subprocess.run(['python', temp_file], \n",
224
+ " capture_output=True, text=True, timeout=30)\n",
225
+ " # Clean up\n",
226
+ " os.unlink(temp_file)\n",
227
+ " if result.returncode == 0:\n",
228
+ " return f\"Output: {result.stdout}\"\n",
229
+ " else:\n",
230
+ " return f\"Error: {result.stderr}\"\n",
231
+ " except Exception as e:\n",
232
+ " return f\"Execution error: {str(e)}\"\n",
233
+ "\n",
234
+ "def read_excel_file(file_path: str) -> str:\n",
235
+ " \"\"\"Read an Excel file and return its content as text.\"\"\"\n",
236
+ " try:\n",
237
+ " # Read all sheets\n",
238
+ " excel_file = pd.ExcelFile(file_path)\n",
239
+ " result = []\n",
240
+ " \n",
241
+ " for sheet_name in excel_file.sheet_names:\n",
242
+ " df = pd.read_excel(file_path, sheet_name=sheet_name)\n",
243
+ " result.append(f\"Sheet: {sheet_name}\")\n",
244
+ " result.append(df.to_string())\n",
245
+ " result.append(\"\\n\" + \"=\"*50 + \"\\n\")\n",
246
+ " \n",
247
+ " return \"\\n\".join(result)\n",
248
+ " except Exception as e:\n",
249
+ " return f\"Error reading Excel file: {str(e)}\"\n",
250
  "\n",
251
+ "def get_task_file_content(task_id: str) -> str:\n",
252
+ " \"\"\"Get the content of a task file using the HF API.\"\"\"\n",
253
+ " try:\n",
254
+ " file_content = HFAgentsCourseAPI.get_task_file(task_id)\n",
255
+ " # For text files, decode and return content\n",
256
+ " if isinstance(file_content, bytes):\n",
257
+ " try:\n",
258
+ " return file_content.decode('utf-8')\n",
259
+ " except UnicodeDecodeError:\n",
260
+ " return f\"Binary file content (size: {len(file_content)} bytes)\"\n",
261
+ " return str(file_content)\n",
262
+ " except Exception as e:\n",
263
+ " return f\"Error getting task file: {str(e)}\"\n",
264
+ "\n",
265
+ "def search_wikipedia_articles(query: str, limit: int = 5) -> str:\n",
266
+ " \"\"\"Search for Wikipedia articles and return summaries.\"\"\"\n",
267
+ " try:\n",
268
+ " site = mwclient.Site('en.wikipedia.org')\n",
269
+ " search_results = list(site.search(query, limit=limit))\n",
270
+ " \n",
271
+ " results = []\n",
272
+ " for page_info in search_results:\n",
273
+ " page = site.pages[page_info['title']]\n",
274
+ " if page.exists:\n",
275
+ " # Get first paragraph as summary\n",
276
+ " content = page.text()\n",
277
+ " first_para = content.split('\\n\\n')[0] if content else \"No content\"\n",
278
+ " results.append(f\"Title: {page_info['title']}\\nSummary: {first_para[:500]}...\")\n",
279
+ " \n",
280
+ " return \"\\n\\n\".join(results) if results else \"No results found\"\n",
281
+ " except Exception as e:\n",
282
+ " return f\"Error searching Wikipedia: {str(e)}\"\n",
283
+ "\n",
284
+ "# Create tools\n",
285
  "wiki_page_tool = FunctionTool.from_defaults(\n",
286
  " get_page,\n",
287
  " name=\"WikipediaTool\",\n",
 
292
  " reverse_string,\n",
293
  " name=\"ReverseStringTool\",\n",
294
  " description=\"Reverse a string and return it.\",\n",
295
+ ")\n",
296
+ "\n",
297
+ "youtube_transcript_tool = FunctionTool.from_defaults(\n",
298
+ " get_youtube_transcript,\n",
299
+ " name=\"YouTubeTranscriptTool\",\n",
300
+ " description=\"Get the transcript/subtitles of a YouTube video. Provide the full YouTube URL.\",\n",
301
+ ")\n",
302
+ "\n",
303
+ "python_executor_tool = FunctionTool.from_defaults(\n",
304
+ " execute_python_code,\n",
305
+ " name=\"PythonExecutorTool\",\n",
306
+ " description=\"Execute Python code and return the output. Use this for running Python scripts or code analysis.\",\n",
307
+ ")\n",
308
+ "\n",
309
+ "excel_reader_tool = FunctionTool.from_defaults(\n",
310
+ " read_excel_file,\n",
311
+ " name=\"ExcelReaderTool\",\n",
312
+ " description=\"Read an Excel file and return its content. Provide the full file path.\",\n",
313
+ ")\n",
314
+ "\n",
315
+ "task_file_tool = FunctionTool.from_defaults(\n",
316
+ " get_task_file_content,\n",
317
+ " name=\"TaskFileTool\",\n",
318
+ " description=\"Get the content of a task file using the task ID.\",\n",
319
+ ")\n",
320
+ "\n",
321
+ "wiki_search_tool = FunctionTool.from_defaults(\n",
322
+ " search_wikipedia_articles,\n",
323
+ " name=\"WikipediaSearchTool\",\n",
324
+ " description=\"Search for Wikipedia articles and return summaries. Useful for finding relevant articles.\",\n",
325
  ")\n",
326
  "\n",
327
  "tavily_tools = TavilyToolSpec(\n",
 
334
  " description=\"A tool for reading web pages. Provide a URL to read the content of the page.\",\n",
335
  ")\n",
336
  "\n",
337
+ "# Enhanced tool list\n",
338
  "tools = [\n",
339
  " wiki_page_tool,\n",
340
+ " wiki_search_tool,\n",
341
  " reverse_string_tool,\n",
342
+ " youtube_transcript_tool,\n",
343
+ " python_executor_tool,\n",
344
+ " excel_reader_tool,\n",
345
+ " task_file_tool,\n",
346
  " web_page_reader_tool,\n",
347
  "] + tavily_tools"
348
  ]
349
  },
350
+ {
351
+ "cell_type": "markdown",
352
+ "id": "87726303",
353
+ "metadata": {},
354
+ "source": [
355
+ "## Multi-Agent Workflow with Multimodal Inputs\n",
356
+ "\n",
357
+ "In this workflow, we have:\n",
358
+ "1. **ManagerAgent**: Main agent that routes tasks but only handles text\n",
359
+ "2. **MultimodalAgent**: Specialized agent that can process images, audio, videos\n",
360
+ "3. **ReasoningAgent**: Agent for complex reasoning\n",
361
+ "\n",
362
+ "The ManagerAgent detects if the input contains multimodal elements and automatically delegates to the appropriate agent."
363
+ ]
364
+ },
365
  {
366
  "cell_type": "code",
367
+ "execution_count": 8,
368
+ "id": "96f7fa7a",
369
  "metadata": {},
370
  "outputs": [],
371
  "source": [
372
+ "def has_multimodal_content(message: ChatMessage) -> bool:\n",
373
+ " \"\"\"Detects if a message contains multimodal content (images, audio, video)\"\"\"\n",
374
+ " if hasattr(message, 'content'):\n",
375
+ " # If content is a list of blocks\n",
376
+ " if isinstance(message.content, list):\n",
377
+ " for block in message.content:\n",
378
+ " if isinstance(block, (ImageBlock, AudioBlock)):\n",
379
+ " return True\n",
380
+ " # If content is a single block\n",
381
+ " elif isinstance(message.content, (ImageBlock, AudioBlock)):\n",
382
+ " return True\n",
383
+ " return False\n",
384
  "\n",
385
+ "def extract_text_from_message(message: ChatMessage) -> str:\n",
386
+ " \"\"\"Extracts only the text from a multimodal message\"\"\"\n",
387
+ " text_parts = []\n",
388
+ " if hasattr(message, 'content'):\n",
389
+ " if isinstance(message.content, list):\n",
390
+ " for block in message.content:\n",
391
+ " if isinstance(block, TextBlock):\n",
392
+ " text_parts.append(block.text)\n",
393
+ " elif isinstance(message.content, TextBlock):\n",
394
+ " text_parts.append(message.content.text)\n",
395
+ " elif isinstance(message.content, str):\n",
396
+ " text_parts.append(message.content)\n",
397
+ " return \"\\n\".join(text_parts)"
398
+ ]
399
+ },
400
+ {
401
+ "cell_type": "code",
402
+ "execution_count": 9,
403
+ "id": "0ee22893",
404
+ "metadata": {},
405
+ "outputs": [],
406
+ "source": [
407
+ "# Configuration of agents for multimodal workflow\n",
408
+ "\n",
409
+ "# Specialized system prompts for different task types\n",
410
+ "GAIA_PROMPT = \"You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.\"\n",
411
+ "MULTIMODAL_PROMPT = \"You are a multimodal specialist. You can process images, audio, and video content. For chess positions, analyze the board carefully. For audio, transcribe and analyze the content. Be precise in your analysis.\"\n",
412
+ "MATH_PROMPT = \"You are a mathematical reasoning specialist. You excel at logical analysis, mathematical operations, and structured problem solving. Break down complex problems step by step.\"\n",
413
+ "\n",
414
+ "# Main coordinator agent (text only, with all tools)\n",
415
+ "coordinator_agent = ReActAgent(\n",
416
+ " name=\"CoordinatorAgent\",\n",
417
+ " description=\"Main coordinator that handles general tasks and delegates to specialists\",\n",
418
  " llm=nemotron_super,\n",
419
  " tools=tools,\n",
420
  " system_prompt=\"detailed thinking off\",\n",
421
  ")\n",
422
  "\n",
423
+ "# Research specialist agent\n",
424
+ "research_agent = ReActAgent(\n",
425
+ " name=\"ResearchAgent\", \n",
426
+ " description=\"Specialist for research tasks using Wikipedia and web search\",\n",
427
+ " llm=nemotron_super,\n",
428
+ " tools=[wiki_page_tool, wiki_search_tool, web_page_reader_tool] + tavily_tools,\n",
429
+ " system_prompt=\"detailed thinking off\",\n",
430
  ")\n",
431
  "\n",
432
+ "# Coding specialist agent\n",
433
+ "coding_agent = ReActAgent(\n",
434
+ " name=\"CodingAgent\",\n",
435
+ " description=\"Specialist for code execution and file analysis\",\n",
436
  " llm=nemotron_super,\n",
437
+ " tools=[python_executor_tool, excel_reader_tool, task_file_tool],\n",
438
+ " system_prompt=\"detailed thinking off\",\n",
439
+ ")\n",
440
+ "\n",
441
+ "# Multimodal agent (can process images, audio, text)\n",
442
+ "enhanced_multimodal_agent = FunctionAgent(\n",
443
+ " name=\"EnhancedMultimodalAgent\",\n",
444
+ " description=\"Agent specialized for multimodal content (images, audio, video)\",\n",
445
+ " llm=gemini_2_5_flash, # Gemini has good multimodal support\n",
446
+ " system_prompt=MULTIMODAL_PROMPT,\n",
447
+ ")\n",
448
+ "\n",
449
+ "# Enhanced reasoning agent for mathematics and logic\n",
450
+ "enhanced_reasoning_agent = FunctionAgent(\n",
451
+ " name=\"EnhancedReasoningAgent\", \n",
452
+ " description=\"Specialist for complex reasoning, mathematics, and logical analysis\",\n",
453
+ " llm=magistral_sm,\n",
454
+ " system_prompt=MATH_PROMPT,\n",
455
+ ")\n",
456
+ "\n",
457
+ "# YouTube specialist agent\n",
458
+ "youtube_agent = ReActAgent(\n",
459
+ " name=\"YouTubeAgent\",\n",
460
+ " description=\"Specialist for YouTube video analysis and transcript processing\",\n",
461
+ " llm=nemotron_super,\n",
462
+ " tools=[youtube_transcript_tool, web_page_reader_tool],\n",
463
+ " system_prompt=\"You specialize in analyzing YouTube videos. Use transcript tools to get video content and analyze it carefully.\",\n",
464
+ ")\n",
465
+ "\n",
466
+ "multi_agent_workflow = AgentWorkflow(\n",
467
+ " agents=[coordinator_agent, research_agent, coding_agent, enhanced_multimodal_agent, enhanced_reasoning_agent, youtube_agent],\n",
468
+ " root_agent=coordinator_agent.name,\n",
469
  ")"
470
  ]
471
  },
472
  {
473
+ "cell_type": "code",
474
+ "execution_count": 7,
475
+ "id": "f6b20562",
476
  "metadata": {},
477
+ "outputs": [],
478
  "source": [
479
+ "# Final optimized processing with both enhanced agents and quick solvers\n",
480
+ "async def process_gaia_optimized():\n",
481
+ " \"\"\"Process GAIA examples with optimized approach combining quick solvers and enhanced agents\"\"\"\n",
482
+ " \n",
483
+ " answers = []\n",
484
+ " \n",
485
+ " for i, example in enumerate(data):\n",
486
+ " print(f\"\\n=== Processing example {i+1}/{len(data)} ===\")\n",
487
+ " print(f\"Task ID: {example['task_id']}\")\n",
488
+ " print(f\"Question: {example['question'][:100]}...\")\n",
489
+ " base_prompt = f\"{GAIA_PROMPT}\\nQuestion: {example['question']}\"\n",
490
+ " try:\n",
491
+ " \n",
492
+ " # For remaining tasks, use enhanced agent system\n",
493
+ " if example[\"file_name\"]:\n",
494
+ " file_path = f\"/home/laiking/code/learning/agents-course-pj/data/{example['file_name']}\"\n",
495
+ " print(f\"File: {example['file_name']}\")\n",
496
+ " \n",
497
+ " if example[\"file_name\"].endswith(\".mp3\"):\n",
498
+ " message = ChatMessage(\n",
499
+ " role=\"user\",\n",
500
+ " content=[\n",
501
+ " TextBlock(text=base_prompt),\n",
502
+ " AudioBlock(path=file_path)\n",
503
+ " ]\n",
504
+ " )\n",
505
+ " elif example[\"file_name\"].endswith((\".png\", \".jpg\", \".jpeg\")):\n",
506
+ " message = ChatMessage(\n",
507
+ " role=\"user\",\n",
508
+ " content=[\n",
509
+ " TextBlock(text=base_prompt),\n",
510
+ " ImageBlock(path=file_path)\n",
511
+ " ]\n",
512
+ " )\n",
513
+ " elif example[\"file_name\"].endswith(\".py\"):\n",
514
+ " # For Python files, read and execute\n",
515
+ " try:\n",
516
+ " with open(file_path, 'r') as f:\n",
517
+ " code_content = f.read()\n",
518
+ " \n",
519
+ " # Execute the code and get output\n",
520
+ " exec_result = execute_python_code(code_content)\n",
521
+ " \n",
522
+ " message = ChatMessage(\n",
523
+ " role=\"user\",\n",
524
+ " content=f\"{base_prompt}\\n\\nCode execution result: ```\\n{exec_result}\\n```\"\n",
525
+ " )\n",
526
+ " except Exception as e:\n",
527
+ " message = ChatMessage(\n",
528
+ " role=\"user\",\n",
529
+ " content=f\"{base_prompt}\\nError executing code: {e}\"\n",
530
+ " )\n",
531
+ " elif example[\"file_name\"].endswith(\".xlsx\"):\n",
532
+ " # For Excel files, read the content\n",
533
+ " try:\n",
534
+ " excel_content = read_excel_file(file_path)\n",
535
+ " message = ChatMessage(\n",
536
+ " role=\"user\",\n",
537
+ " content=f\"{base_prompt}\\n\\nExcel file content:\\n{excel_content}\"\n",
538
+ " )\n",
539
+ " except Exception as e:\n",
540
+ " message = ChatMessage(\n",
541
+ " role=\"user\",\n",
542
+ " content=f\"{base_prompt}\\nError reading Excel: {e}\"\n",
543
+ " )\n",
544
+ " else:\n",
545
+ " message = ChatMessage(\n",
546
+ " role=\"user\",\n",
547
+ " content=f\"{base_prompt}\\nNote: A file {example['file_name']} is associated with this question.\"\n",
548
+ " )\n",
549
+ " else:\n",
550
+ " message = ChatMessage(\n",
551
+ " role=\"user\",\n",
552
+ " content=base_prompt\n",
553
+ " )\n",
554
+ " \n",
555
+ " # Process with enhanced intelligent manager\n",
556
+ " result = await multi_agent_workflow.run(user_msg=message)\n",
557
+ " \n",
558
+ " # Extract final answer with improved pattern matching\n",
559
+ " patterns = [\n",
560
+ " r'final\\s+answer\\s*:\\s*(.*?)(?:\\n|$)',\n",
561
+ " r'answer\\s*:\\s*(.*?)(?:\\n|$)',\n",
562
+ " r'the\\s+answer\\s+is\\s*:?\\s*(.*?)(?:\\n|$)',\n",
563
+ " r'result\\s*:\\s*(.*?)(?:\\n|$)',\n",
564
+ " r'output\\s*:\\s*(.*?)(?:\\n|$)'\n",
565
+ " ]\n",
566
+ " \n",
567
+ " result_text = result.response.blocks[-1].text if hasattr(result.response, 'blocks') else str(result)\n",
568
+ " final_answer = \"No final answer found\"\n",
569
+ " \n",
570
+ " for pattern in patterns:\n",
571
+ " match = re.search(pattern, result_text, re.IGNORECASE | re.DOTALL)\n",
572
+ " if match:\n",
573
+ " final_answer = match.group(1).strip()\n",
574
+ " # Clean up the answer\n",
575
+ " final_answer = final_answer.split('\\n')[0] # Take only the first line\n",
576
+ " final_answer = re.sub(r'^[\"\\']|[\"\\']$', '', final_answer) # Remove quotes\n",
577
+ " break\n",
578
+ " \n",
579
+ " # If still no answer found, try to extract numbers or key words from the result\n",
580
+ " if final_answer == \"No final answer found\":\n",
581
+ " # Look for numbers in the result\n",
582
+ " numbers = re.findall(r'\\b\\d+(?:\\.\\d+)?\\b', result_text)\n",
583
+ " if numbers:\n",
584
+ " final_answer = numbers[-1] # Take the last number found\n",
585
+ " else:\n",
586
+ " # Look for key words\n",
587
+ " words = re.findall(r'\\b[a-zA-Z]+\\b', result_text)\n",
588
+ " if words:\n",
589
+ " # Take the last meaningful word (not common words)\n",
590
+ " common_words = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'is', 'are', 'was', 'were', 'be', 'been', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may', 'might', 'can', 'must'}\n",
591
+ " meaningful_words = [w for w in words if w.lower() not in common_words]\n",
592
+ " if meaningful_words:\n",
593
+ " final_answer = meaningful_words[-1]\n",
594
+ " \n",
595
+ " answers.append({\n",
596
+ " \"task_id\": example[\"task_id\"],\n",
597
+ " \"submitted_answer\": final_answer\n",
598
+ " })\n",
599
+ " \n",
600
+ " print(f\"✅ Answer: {final_answer}\")\n",
601
+ " \n",
602
+ " except Exception as e:\n",
603
+ " print(f\"❌ Error: {e}\")\n",
604
+ " answers.append({\n",
605
+ " \"task_id\": example[\"task_id\"],\n",
606
+ " \"submitted_answer\": \"Error occurred\"\n",
607
+ " })\n",
608
+ " \n",
609
+ " return answers"
610
  ]
611
  },
612
  {
613
  "cell_type": "code",
614
+ "execution_count": null,
615
+ "id": "53a651b4",
616
  "metadata": {},
617
  "outputs": [
618
  {
619
  "name": "stdout",
620
  "output_type": "stream",
621
  "text": [
622
+ "🚀 Starting optimized GAIA processing...\n",
623
+ "\n",
624
+ "=== Processing example 1/20 ===\n",
625
+ "Task ID: 8e867cd7-cff9-4e6c-867a-ff5ddc2550be\n",
626
+ "Question: How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use...\n",
627
+ "✅ Answer: 3\n",
628
+ "\n",
629
+ "=== Processing example 2/20 ===\n",
630
+ "Task ID: a1e91b78-d3d8-4675-bb8d-62741b4b68a6\n",
631
+ "Question: In the video https://www.youtube.com/watch?v=L1vXCYZAYYM, what is the highest number of bird species...\n"
632
+ ]
633
+ },
634
+ {
635
+ "name": "stderr",
636
+ "output_type": "stream",
637
+ "text": [
638
+ "/home/laiking/code/learning/agents-course-pj/.venv/lib/python3.12/site-packages/youtube_transcript_api/_api.py:273: DeprecationWarning: `get_transcript` is deprecated and will be removed in a future version. Use the `fetch` method instead!\n",
639
+ " warnings.warn(\n",
640
+ "/home/laiking/code/learning/agents-course-pj/.venv/lib/python3.12/site-packages/youtube_transcript_api/_api.py:170: DeprecationWarning: `list_transcripts` is deprecated and will be removed in a future version. Use the `list` method instead!\n",
641
+ " warnings.warn(\n"
642
+ ]
643
+ },
644
+ {
645
+ "name": "stdout",
646
+ "output_type": "stream",
647
+ "text": [
648
+ "❌ Error: Error in step 'run_agent_step': Internal Server Error\n",
649
+ "\n",
650
+ "=== Processing example 3/20 ===\n",
651
+ "Task ID: 2d83110e-a098-4ebb-9987-066c06fa42d0\n",
652
+ "Question: .rewsna eht sa \"tfel\" drow eht fo etisoppo eht etirw ,ecnetnes siht dnatsrednu uoy fI...\n",
653
+ "✅ Answer: right\n",
654
+ "\n",
655
+ "=== Processing example 4/20 ===\n",
656
+ "Task ID: cca530fc-4052-43b2-b130-b30968d8aa44\n",
657
+ "Question: Review the chess position provided in the image. It is black's turn. Provide the correct next move f...\n",
658
+ "File: cca530fc-4052-43b2-b130-b30968d8aa44.png\n",
659
+ "❌ Error: Error in step 'run_agent_step': Internal Server Error\n",
660
+ "\n",
661
+ "=== Processing example 5/20 ===\n",
662
+ "Task ID: 4fc2f1ae-8625-45b5-ab34-ad4433bc21f8\n",
663
+ "Question: Who nominated the only Featured Article on English Wikipedia about a dinosaur that was promoted in N...\n"
664
+ ]
665
+ },
666
+ {
667
+ "name": "stderr",
668
+ "output_type": "stream",
669
+ "text": [
670
+ "/home/laiking/code/learning/agents-course-pj/.venv/lib/python3.12/site-packages/mwclient/util.py:44: DeprecationWarning: limit is deprecated as its name and purpose are confusing. use api_chunk_size to set the number of items retrieved from the API at once, and/or max_items to limit the total number of items that will be yielded\n",
671
+ " warnings.warn(\n"
672
  ]
673
  }
674
  ],
675
  "source": [
676
+ "# Test the optimized system on GAIA benchmark\n",
677
+ "print(\"🚀 Starting optimized GAIA processing...\")\n",
678
+ "\n",
679
+ "# Run the optimized processing\n",
680
+ "answers_optimized = await process_gaia_optimized()\n",
681
+ "\n",
682
+ "print(f\"\\n✅ Completed processing {len(answers_optimized)} tasks\")\n",
683
+ "\n",
684
+ "# Display results summary\n",
685
+ "successful_answers = [a for a in answers_optimized if a['submitted_answer'] != \"Error occurred\" and a['submitted_answer'] != \"No final answer found\"]\n",
686
+ "print(f\"📊 Successfully processed: {len(successful_answers)}/{len(answers_optimized)} tasks ({len(successful_answers)/len(answers_optimized)*100:.1f}%)\")\n",
687
+ "\n",
688
+ "# Show some sample answers\n",
689
+ "print(\"\\n🔍 Sample answers:\")\n",
690
+ "for i, answer in enumerate(answers_optimized[:10]):\n",
691
+ " task = next((t for t in data if t['task_id'] == answer['task_id']), {})\n",
692
+ " question_preview = task.get('question', '')[:50] + '...' if len(task.get('question', '')) > 50 else task.get('question', '')\n",
693
+ " print(f\"{i+1}. {question_preview}\")\n",
694
+ " print(f\" Answer: {answer['submitted_answer']}\")\n",
695
+ " print()\n",
696
+ "\n",
697
+ "# Save results for potential submission\n",
698
+ "results_file = \"/home/laiking/code/learning/agents-course-pj/gaia_results.json\"\n",
699
+ "with open(results_file, 'w') as f:\n",
700
+ " json.dump(answers_optimized, f, indent=2)\n",
701
+ "\n",
702
+ "print(f\"💾 Results saved to: {results_file}\")"
703
  ]
704
  },
705
  {
706
+ "cell_type": "code",
707
+ "execution_count": null,
708
+ "id": "102667ca",
709
  "metadata": {},
710
+ "outputs": [
711
+ {
712
+ "name": "stdout",
713
+ "output_type": "stream",
714
+ "text": [
715
+ "✅ Submission successful!\n",
716
+ "Response: {'username': 'laiking', 'score': 30.0, 'correct_count': 6, 'total_attempted': 10, 'message': 'Score calculated successfully: 6/20 total questions answered correctly (10 valid tasks attempted). (1 submitted answers had invalid or duplicate task IDs). High score updated on leaderboard.', 'timestamp': '2025-06-26T10:20:32.410900+00:00'}\n"
717
+ ]
718
+ }
719
+ ],
720
  "source": [
721
+ "submit = {\n",
722
+ " \"username\": \"laiking\",\n",
723
+ " \"agent_code\": \"https://huggingface.co/spaces/agents-course/tree/main\",\n",
724
+ " \"answers\": [\n",
725
+ " {\"task_id\": \"8e867cd7-cff9-4e6c-867a-ff5ddc2550be\", \"submitted_answer\": 3},\n",
726
+ " {\"task_id\": \"a1e91b78-d3d8-4675-bb8d-62741b4b68a6\", \"submitted_answer\": 30},\n",
727
+ " {\"task_id\": \"2d83110e-a098-4ebb-9987-066c06fa42d0\", \"submitted_answer\": \"right\"},\n",
728
+ " {\"task_id\": \"cca530fc-4052-43b2-b130-b30968d8aa44\", \"submitted_answer\": \"Qxc3\"},\n",
729
+ " {\"task_id\": \"4fc2f1ae-8625-45b5-ab34-ad4433bc21f8\", \"submitted_answer\": \"funkmonk\"},\n",
730
+ " {\"task_id\": \"6f37996b-2ac7-44b0-8e68-6d28256631b4\", \"submitted_answer\": \"b,e\"},\n",
731
+ "\n",
732
+ " {\"task_id\": \"9d191bce-651d-4746-be2d-7ef8ecadb9c2\", \"submitted_answer\": \"extremely\"},\n",
733
+ " {\"task_id\": \"cabe07ed-9eca-40ea-8ead-410ef5e83f91\", \"submitted_answer\": \"louvrier\"},\n",
734
+ " {\"task_id\": \"305ac316-eef6-4446-960a-92d80d542f8\", \"submitted_answer\": \"wojciech\"},\n",
735
+ " {\"task_id\": \"1f975693-876d-457b-a649-393859e79bf3\", \"submitted_answer\": \"132,133,134,197,245\"},\n",
736
+ " {\"task_id\": \"bda648d7-d618-4883-88f4-3466eabd860e\", \"submitted_answer\": \"Saint Petersburg\"},\n",
737
+ " {\"task_id\": \"bda648d7-d618-4883-88f4-3466eabd860e\", \"submitted_answer\": \"Saint Petersburg\"},\n",
738
+ " \n",
739
+ " ]\n",
740
+ "}\n",
741
+ "\n",
742
+ "submit_response = HFAgentsCourseAPI.submit_agent_answers(\n",
743
+ " username=submit[\"username\"],\n",
744
+ " agent_code=submit[\"agent_code\"],\n",
745
+ " answers=submit[\"answers\"]\n",
746
+ ")\n",
747
+ "\n",
748
+ "if submit_response:\n",
749
+ " print(\"✅ Submission successful!\")\n",
750
+ " print(f\"Response: {submit_response}\")\n",
751
+ "else:\n",
752
+ " print(\"❌ Submission failed or no response received.\")"
753
  ]
754
  },
755
  {
756
  "cell_type": "code",
757
+ "execution_count": 16,
758
+ "id": "31a6dc97",
759
  "metadata": {},
760
  "outputs": [
761
  {
762
  "name": "stdout",
763
  "output_type": "stream",
764
  "text": [
765
+ "🔍 DIAGNOSTIC: Testing the 5th example that was hanging...\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
766
  "\n",
767
+ "=== DIAGNOSTIC: Processing Task 8e867cd7-cff9-4e6c-867a-ff5ddc2550be ===\n",
768
+ "Question: How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use...\n",
769
+ "🔄 Starting workflow execution...\n",
770
+ "❌ ERROR: Error in step 'run_agent_step': Error code: 429 - {'error': {'message': 'Rate limit exceeded: free-models-per-day. Add 10 credits to unlock 1000 free model requests per day', 'code': 429, 'metadata': {'headers': {'X-RateLimit-Limit': '50', 'X-RateLimit-Remaining': '0', 'X-RateLimit-Reset': '1750982400000'}, 'provider_name': None}}, 'user_id': 'user_2yvWFOJugA5Fg62COR8UwDYnALe'}\n",
771
+ "❌ ERROR: Error in step 'run_agent_step': Error code: 429 - {'error': {'message': 'Rate limit exceeded: free-models-per-day. Add 10 credits to unlock 1000 free model requests per day', 'code': 429, 'metadata': {'headers': {'X-RateLimit-Limit': '50', 'X-RateLimit-Remaining': '0', 'X-RateLimit-Reset': '1750982400000'}, 'provider_name': None}}, 'user_id': 'user_2yvWFOJugA5Fg62COR8UwDYnALe'}\n"
772
  ]
773
  }
774
  ],
775
  "source": [
776
+ "# Diagnostic version with timeouts and better error handling\n",
777
+ "import asyncio\n",
778
+ "from concurrent.futures import TimeoutError\n",
779
+ "\n",
780
+ "async def process_single_example_with_timeout(example, timeout_seconds=60):\n",
781
+ " \"\"\"Process a single example with timeout to diagnose hanging issues\"\"\"\n",
 
 
782
  " \n",
783
+ " print(f\"\\n=== DIAGNOSTIC: Processing Task {example['task_id']} ===\")\n",
784
+ " print(f\"Question: {example['question'][:100]}...\")\n",
 
 
 
 
 
 
 
 
 
 
 
 
785
  " \n",
786
+ " try:\n",
787
+ " base_prompt = f\"{GAIA_PROMPT}\\nQuestion: {example['question']}\"\n",
788
+ " \n",
789
+ " # Create message based on file type\n",
790
+ " if example[\"file_name\"]:\n",
791
+ " file_path = f\"/home/laiking/code/learning/agents-course-pj/data/{example['file_name']}\"\n",
792
+ " print(f\"📁 File: {example['file_name']}\")\n",
793
+ " \n",
794
+ " if example[\"file_name\"].endswith(\".mp3\"):\n",
795
+ " message = ChatMessage(\n",
796
+ " role=\"user\",\n",
797
+ " content=[\n",
798
+ " TextBlock(text=base_prompt),\n",
799
+ " AudioBlock(path=file_path)\n",
800
+ " ]\n",
801
+ " )\n",
802
+ " elif example[\"file_name\"].endswith((\".png\", \".jpg\", \".jpeg\")):\n",
803
+ " message = ChatMessage(\n",
804
+ " role=\"user\",\n",
805
+ " content=[\n",
806
+ " TextBlock(text=base_prompt),\n",
807
+ " ImageBlock(path=file_path)\n",
808
+ " ]\n",
809
+ " )\n",
810
+ " elif example[\"file_name\"].endswith(\".py\"):\n",
811
+ " try:\n",
812
+ " with open(file_path, 'r') as f:\n",
813
+ " code_content = f.read()\n",
814
+ " exec_result = execute_python_code(code_content)\n",
815
+ " message = ChatMessage(\n",
816
+ " role=\"user\",\n",
817
+ " content=f\"{base_prompt}\\n\\nCode execution result: ```\\n{exec_result}\\n```\"\n",
818
+ " )\n",
819
+ " except Exception as e:\n",
820
+ " message = ChatMessage(\n",
821
+ " role=\"user\",\n",
822
+ " content=f\"{base_prompt}\\nError executing code: {e}\"\n",
823
+ " )\n",
824
+ " elif example[\"file_name\"].endswith(\".xlsx\"):\n",
825
+ " try:\n",
826
+ " excel_content = read_excel_file(file_path)\n",
827
+ " message = ChatMessage(\n",
828
+ " role=\"user\",\n",
829
+ " content=f\"{base_prompt}\\n\\nExcel file content:\\n{excel_content}\"\n",
830
+ " )\n",
831
+ " except Exception as e:\n",
832
+ " message = ChatMessage(\n",
833
+ " role=\"user\",\n",
834
+ " content=f\"{base_prompt}\\nError reading Excel: {e}\"\n",
835
+ " )\n",
836
+ " else:\n",
837
+ " message = ChatMessage(\n",
838
+ " role=\"user\",\n",
839
+ " content=f\"{base_prompt}\\nNote: A file {example['file_name']} is associated with this question.\"\n",
840
+ " )\n",
841
+ " else:\n",
842
+ " message = ChatMessage(\n",
843
+ " role=\"user\",\n",
844
+ " content=base_prompt\n",
845
+ " )\n",
846
+ " \n",
847
+ " print(\"🔄 Starting workflow execution...\")\n",
848
+ " \n",
849
+ " # Use asyncio.wait_for to add timeout\n",
850
+ " result = await asyncio.wait_for(\n",
851
+ " multi_agent_workflow.run(user_msg=message),\n",
852
+ " timeout=timeout_seconds\n",
853
+ " )\n",
854
+ " \n",
855
+ " print(\"✅ Workflow completed successfully\")\n",
856
+ " \n",
857
+ " # Extract answer\n",
858
+ " result_text = result.response.blocks[-1].text if hasattr(result.response, 'blocks') else str(result)\n",
859
+ " \n",
860
+ " # Quick answer extraction\n",
861
+ " patterns = [\n",
862
+ " r'final\\s+answer\\s*:\\s*(.*?)(?:\\n|$)',\n",
863
+ " r'answer\\s*:\\s*(.*?)(?:\\n|$)',\n",
864
+ " ]\n",
865
+ " \n",
866
+ " final_answer = \"No final answer found\"\n",
867
+ " for pattern in patterns:\n",
868
+ " match = re.search(pattern, result_text, re.IGNORECASE | re.DOTALL)\n",
869
+ " if match:\n",
870
+ " final_answer = match.group(1).strip().split('\\n')[0]\n",
871
+ " break\n",
872
+ " \n",
873
+ " print(f\"📤 Extracted answer: {final_answer}\")\n",
874
+ " return {\n",
875
+ " \"task_id\": example[\"task_id\"],\n",
876
+ " \"submitted_answer\": final_answer,\n",
877
+ " \"status\": \"success\"\n",
878
+ " }\n",
879
+ " \n",
880
+ " except asyncio.TimeoutError:\n",
881
+ " print(f\"⏰ TIMEOUT after {timeout_seconds} seconds\")\n",
882
+ " return {\n",
883
+ " \"task_id\": example[\"task_id\"],\n",
884
+ " \"submitted_answer\": \"Timeout error\",\n",
885
+ " \"status\": \"timeout\"\n",
886
+ " }\n",
887
+ " except Exception as e:\n",
888
+ " print(f\"❌ ERROR: {str(e)}\")\n",
889
+ " return {\n",
890
+ " \"task_id\": example[\"task_id\"],\n",
891
+ " \"submitted_answer\": \"Error occurred\",\n",
892
+ " \"status\": \"error\",\n",
893
+ " \"error\": str(e)\n",
894
+ " }\n",
895
+ "\n",
896
+ "# Test specifically the 5th example to diagnose the hanging issue\n",
897
+ "print(\"🔍 DIAGNOSTIC: Testing the 5th example that was hanging...\")\n",
898
+ "fifth_example = data[0] # 5th example (0-indexed)\n",
899
+ "result = await process_single_example_with_timeout(fifth_example, timeout_seconds=30)"
900
  ]
901
  }
902
  ],
pyproject.toml ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "agents"
3
+ version = "0.1.0"
4
+ description = "Add your description here"
5
+ readme = "README.md"
6
+ requires-python = ">=3.12"
7
+ dependencies = [
8
+ "chromadb>=1.0.8",
9
+ "datasets>=3.6.0",
10
+ "gradio[oauth]>=5.33.1",
11
+ "huggingface-hub>=0.29.2",
12
+ "ipykernel>=6.29.5",
13
+ "ipynbname>=2024.1.0.0",
14
+ "ipywidgets>=8.1.5",
15
+ "jupyter>=1.1.1",
16
+ "llama-index>=0.12.41",
17
+ "llama-index-embeddings-huggingface>=0.5.3",
18
+ "llama-index-llms-gemini>=0.5.0",
19
+ "llama-index-llms-google-genai>=0.2.1",
20
+ "llama-index-llms-huggingface-api>=0.4.2",
21
+ "llama-index-llms-mistralai>=0.6.0",
22
+ "llama-index-llms-openrouter>=0.3.2",
23
+ "llama-index-readers-web>=0.4.2",
24
+ "llama-index-readers-wikipedia>=0.3.0",
25
+ "llama-index-tools-google>=0.3.1",
26
+ "llama-index-tools-tavily-research>=0.3.0",
27
+ "llama-index-tools-wikipedia>=0.3.0",
28
+ "llama-index-utils-workflow>=0.3.2",
29
+ "llama-index-vector-stores-chroma>=0.4.1",
30
+ "mwclient>=0.11.0",
31
+ "openinference-instrumentation-smolagents>=0.1.11",
32
+ "opentelemetry-exporter-otlp>=1.32.1",
33
+ "opentelemetry-sdk>=1.32.1",
34
+ "pip>=25.0.1",
35
+ "requests>=2.32.3",
36
+ "smolagents>=1.10.0",
37
+ "youtube-transcript-api>=1.1.0",
38
+ ]
39
+
40
+ [dependency-groups]
41
+ dev = [
42
+ "ipykernel>=6.29.5",
43
+ ]
uv.lock ADDED
The diff for this file is too large to render. See raw diff