Spaces:
Sleeping
Sleeping
update
Browse files- app.py +50 -13
- notebooks/llamaindex.ipynb +658 -127
- pyproject.toml +43 -0
- uv.lock +0 -0
app.py
CHANGED
|
@@ -8,33 +8,33 @@ import json
|
|
| 8 |
import warnings
|
| 9 |
import mwclient
|
| 10 |
from llama_index.core.tools import FunctionTool
|
| 11 |
-
# from llama_index.llms.mistralai import MistralAI
|
| 12 |
-
# from llama_index.llms.google_genai import GoogleGenAI
|
| 13 |
from llama_index.llms.openrouter import OpenRouter
|
| 14 |
from llama_index.core.agent.workflow import ReActAgent
|
| 15 |
from llama_index.readers.web import BeautifulSoupWebReader
|
| 16 |
from llama_index.tools.tavily_research import TavilyToolSpec
|
| 17 |
-
from llama_index.core.llms import ChatMessage
|
| 18 |
from llama_index.core.tools.ondemand_loader_tool import OnDemandLoaderTool
|
| 19 |
from pydantic.warnings import PydanticDeprecatedSince20, PydanticDeprecatedSince211
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
# Disable pydantic deprecation warnings
|
| 21 |
warnings.filterwarnings("ignore", category=PydanticDeprecatedSince20)
|
| 22 |
warnings.filterwarnings("ignore", category=PydanticDeprecatedSince211)
|
|
|
|
| 23 |
# (Keep Constants as is)
|
| 24 |
# --- Constants ---
|
| 25 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 26 |
|
| 27 |
# --- Basic Agent Definition ---
|
| 28 |
-
# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
|
| 29 |
|
| 30 |
-
# Load LLM API (Google GenAI)
|
| 31 |
-
# gemini_2_5_flash = GoogleGenAI(model="gemini-2.5-flash-preview-05-20") # Audio, images, vidéos et texte -> Texte
|
| 32 |
-
# magistral_sm = MistralAI(model="magistral-small-2506") # reasoning
|
| 33 |
-
# mistral_sm = MistralAI(model="mistral-small-latest") # general purpose + image understanding capabilities
|
| 34 |
nemotron_super = OpenRouter(model="nvidia/llama-3.3-nemotron-super-49b-v1:free") # advanced reasoning, conversational interactions, retrieval-augmented generation (RAG), and tool-calling tasks
|
| 35 |
|
| 36 |
-
|
| 37 |
-
# Tools
|
| 38 |
|
| 39 |
def get_page(page_query:str):
|
| 40 |
"""Send a query to wikipedia and return the text of the page found if it is found, else return an empty string."""
|
|
@@ -83,7 +83,38 @@ tools = [
|
|
| 83 |
|
| 84 |
GAIA_PROMPT = """You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."""
|
| 85 |
|
| 86 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
"""
|
| 88 |
Fetches all questions, runs the BasicAgent on them, submits all answers,
|
| 89 |
and displays the results.
|
|
@@ -109,6 +140,8 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
| 109 |
llm=nemotron_super,
|
| 110 |
tools=tools,
|
| 111 |
system_prompt="detailed thinking off",
|
|
|
|
|
|
|
| 112 |
)
|
| 113 |
except Exception as e:
|
| 114 |
print(f"Error instantiating agent: {e}")
|
|
@@ -118,7 +151,8 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
| 118 |
print(agent_code)
|
| 119 |
|
| 120 |
# 2. Fetch Questions
|
| 121 |
-
|
|
|
|
| 122 |
try:
|
| 123 |
questions_data = json.load(f)
|
| 124 |
except json.JSONDecodeError as e:
|
|
@@ -134,13 +168,16 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
|
|
| 134 |
for item in questions_data:
|
| 135 |
task_id = item.get("task_id")
|
| 136 |
question_text = item.get("question")
|
|
|
|
|
|
|
| 137 |
if not task_id or question_text is None:
|
| 138 |
print(f"Skipping item with missing task_id or question: {item}")
|
| 139 |
continue
|
|
|
|
| 140 |
try:
|
| 141 |
prompt = f"{GAIA_PROMPT}\nQuestion: {question_text}"
|
| 142 |
message = ChatMessage(role="user",content=prompt) # TODO: handle files/multimodal inputs
|
| 143 |
-
agent_answer = agent.run(user_msg=message)
|
| 144 |
# Parsing agents answer
|
| 145 |
pattern = r'(?:final\s+)?answer\s*:\s*(.*)'
|
| 146 |
match = re.search(pattern, agent_answer.response.blocks[-1].text, re.IGNORECASE)
|
|
|
|
| 8 |
import warnings
|
| 9 |
import mwclient
|
| 10 |
from llama_index.core.tools import FunctionTool
|
|
|
|
|
|
|
| 11 |
from llama_index.llms.openrouter import OpenRouter
|
| 12 |
from llama_index.core.agent.workflow import ReActAgent
|
| 13 |
from llama_index.readers.web import BeautifulSoupWebReader
|
| 14 |
from llama_index.tools.tavily_research import TavilyToolSpec
|
| 15 |
+
from llama_index.core.llms import ChatMessage
|
| 16 |
from llama_index.core.tools.ondemand_loader_tool import OnDemandLoaderTool
|
| 17 |
from pydantic.warnings import PydanticDeprecatedSince20, PydanticDeprecatedSince211
|
| 18 |
+
|
| 19 |
+
# Get environment variables for local testing
|
| 20 |
+
file_path = os.path.dirname(os.path.abspath(__file__))
|
| 21 |
+
environment_file = os.path.join(file_path, ".env")
|
| 22 |
+
if os.path.exists(environment_file): # Load environment variables from .env file
|
| 23 |
+
from dotenv import load_dotenv
|
| 24 |
+
load_dotenv(environment_file)
|
| 25 |
# Disable pydantic deprecation warnings
|
| 26 |
warnings.filterwarnings("ignore", category=PydanticDeprecatedSince20)
|
| 27 |
warnings.filterwarnings("ignore", category=PydanticDeprecatedSince211)
|
| 28 |
+
|
| 29 |
# (Keep Constants as is)
|
| 30 |
# --- Constants ---
|
| 31 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 32 |
|
| 33 |
# --- Basic Agent Definition ---
|
|
|
|
| 34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
nemotron_super = OpenRouter(model="nvidia/llama-3.3-nemotron-super-49b-v1:free") # advanced reasoning, conversational interactions, retrieval-augmented generation (RAG), and tool-calling tasks
|
| 36 |
|
| 37 |
+
# --- Tools ---
|
|
|
|
| 38 |
|
| 39 |
def get_page(page_query:str):
|
| 40 |
"""Send a query to wikipedia and return the text of the page found if it is found, else return an empty string."""
|
|
|
|
| 83 |
|
| 84 |
GAIA_PROMPT = """You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."""
|
| 85 |
|
| 86 |
+
def extract_final_answer(response_text: str) -> str:
|
| 87 |
+
"""Extract the final answer from agent response text."""
|
| 88 |
+
if not response_text:
|
| 89 |
+
return "ERROR: Empty response"
|
| 90 |
+
|
| 91 |
+
# Try multiple patterns to extract final answer
|
| 92 |
+
patterns = [
|
| 93 |
+
r'(?:final\s+)?answer\s*:\s*(.*?)(?:\n|$)',
|
| 94 |
+
r'answer\s*:\s*(.*?)(?:\n|$)',
|
| 95 |
+
r'final\s*:\s*(.*?)(?:\n|$)',
|
| 96 |
+
]
|
| 97 |
+
|
| 98 |
+
for pattern in patterns:
|
| 99 |
+
match = re.search(pattern, response_text, re.IGNORECASE | re.DOTALL)
|
| 100 |
+
if match:
|
| 101 |
+
answer = match.group(1).strip()
|
| 102 |
+
# Clean up the answer
|
| 103 |
+
answer = re.sub(r'\s+', ' ', answer) # Normalize whitespace
|
| 104 |
+
answer = answer.replace('```', '').strip() # Remove code blocks
|
| 105 |
+
if answer and len(answer) < 500: # Reasonable length check
|
| 106 |
+
return answer
|
| 107 |
+
|
| 108 |
+
# Fallback: return last line if no pattern matches
|
| 109 |
+
lines = response_text.strip().split('\n')
|
| 110 |
+
if lines:
|
| 111 |
+
last_line = lines[-1].strip()
|
| 112 |
+
if last_line and len(last_line) < 200:
|
| 113 |
+
return last_line
|
| 114 |
+
|
| 115 |
+
return "No clear final answer found"
|
| 116 |
+
|
| 117 |
+
async def run_and_submit_all( profile: gr.OAuthProfile | None):
|
| 118 |
"""
|
| 119 |
Fetches all questions, runs the BasicAgent on them, submits all answers,
|
| 120 |
and displays the results.
|
|
|
|
| 140 |
llm=nemotron_super,
|
| 141 |
tools=tools,
|
| 142 |
system_prompt="detailed thinking off",
|
| 143 |
+
max_iterations=10,
|
| 144 |
+
verbose=True,
|
| 145 |
)
|
| 146 |
except Exception as e:
|
| 147 |
print(f"Error instantiating agent: {e}")
|
|
|
|
| 151 |
print(agent_code)
|
| 152 |
|
| 153 |
# 2. Fetch Questions
|
| 154 |
+
data_path = os.path.join(file_path, "data", "gaia-tasks.json")
|
| 155 |
+
with open(data_path,"r") as f:
|
| 156 |
try:
|
| 157 |
questions_data = json.load(f)
|
| 158 |
except json.JSONDecodeError as e:
|
|
|
|
| 168 |
for item in questions_data:
|
| 169 |
task_id = item.get("task_id")
|
| 170 |
question_text = item.get("question")
|
| 171 |
+
file_name = item.get("file_name", "")
|
| 172 |
+
|
| 173 |
if not task_id or question_text is None:
|
| 174 |
print(f"Skipping item with missing task_id or question: {item}")
|
| 175 |
continue
|
| 176 |
+
|
| 177 |
try:
|
| 178 |
prompt = f"{GAIA_PROMPT}\nQuestion: {question_text}"
|
| 179 |
message = ChatMessage(role="user",content=prompt) # TODO: handle files/multimodal inputs
|
| 180 |
+
agent_answer = await agent.run(user_msg=message)
|
| 181 |
# Parsing agents answer
|
| 182 |
pattern = r'(?:final\s+)?answer\s*:\s*(.*)'
|
| 183 |
match = re.search(pattern, agent_answer.response.blocks[-1].text, re.IGNORECASE)
|
notebooks/llamaindex.ipynb
CHANGED
|
@@ -18,7 +18,7 @@
|
|
| 18 |
},
|
| 19 |
{
|
| 20 |
"cell_type": "code",
|
| 21 |
-
"execution_count":
|
| 22 |
"id": "a7101ff8",
|
| 23 |
"metadata": {},
|
| 24 |
"outputs": [
|
|
@@ -26,7 +26,7 @@
|
|
| 26 |
"name": "stderr",
|
| 27 |
"output_type": "stream",
|
| 28 |
"text": [
|
| 29 |
-
"/home/laiking/code/learning/agents/.venv/lib/python3.12/site-packages/pydantic/_internal/_config.py:323: PydanticDeprecatedSince20: Support for class-based `config` is deprecated, use ConfigDict instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/\n",
|
| 30 |
" warnings.warn(DEPRECATION_MESSAGE, DeprecationWarning)\n"
|
| 31 |
]
|
| 32 |
}
|
|
@@ -38,20 +38,27 @@
|
|
| 38 |
"import warnings\n",
|
| 39 |
"import requests\n",
|
| 40 |
"import mwclient\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
"from dotenv import load_dotenv\n",
|
|
|
|
|
|
|
|
|
|
| 42 |
"\n",
|
| 43 |
"from llama_index.core.tools import FunctionTool\n",
|
| 44 |
"from llama_index.llms.mistralai import MistralAI\n",
|
| 45 |
"from llama_index.llms.openrouter import OpenRouter\n",
|
| 46 |
"from llama_index.llms.google_genai import GoogleGenAI\n",
|
| 47 |
-
"from llama_index.core.agent.workflow import ReActAgent\n",
|
| 48 |
-
"\n",
|
| 49 |
"from llama_index.readers.web import BeautifulSoupWebReader\n",
|
| 50 |
"from llama_index.tools.tavily_research import TavilyToolSpec\n",
|
| 51 |
-
"from llama_index.core.llms import ChatMessage, TextBlock, ImageBlock, AudioBlock\n",
|
| 52 |
"from llama_index.core.tools.ondemand_loader_tool import OnDemandLoaderTool\n",
|
|
|
|
|
|
|
| 53 |
"\n",
|
| 54 |
"from pydantic.warnings import PydanticDeprecatedSince20, PydanticDeprecatedSince211\n",
|
|
|
|
| 55 |
"# Load environment variables from .env file\n",
|
| 56 |
"load_dotenv()\n",
|
| 57 |
"# Disable pydantic deprecation warnings\n",
|
|
@@ -61,7 +68,7 @@
|
|
| 61 |
},
|
| 62 |
{
|
| 63 |
"cell_type": "code",
|
| 64 |
-
"execution_count":
|
| 65 |
"id": "a4bfbbc9",
|
| 66 |
"metadata": {},
|
| 67 |
"outputs": [],
|
|
@@ -89,46 +96,45 @@
|
|
| 89 |
" raise Exception(f\"Failed to fetch task file for task_id {task_id}: {response.text}\")\n",
|
| 90 |
" \n",
|
| 91 |
" @staticmethod\n",
|
| 92 |
-
" def
|
| 93 |
-
"
|
| 94 |
-
"
|
| 95 |
-
"
|
| 96 |
-
" }\n",
|
| 97 |
-
"
|
| 98 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 99 |
]
|
| 100 |
},
|
| 101 |
{
|
| 102 |
"cell_type": "code",
|
| 103 |
-
"execution_count":
|
| 104 |
"id": "ab28e8d7",
|
| 105 |
"metadata": {},
|
| 106 |
"outputs": [],
|
| 107 |
"source": [
|
| 108 |
"# data loading and downloading (if not)\n",
|
| 109 |
-
"
|
| 110 |
-
"gaia_filename = \"gaia-val-20.json\"\n",
|
| 111 |
-
"\n",
|
| 112 |
"# download questions\n",
|
| 113 |
-
"if not os.path.exists(
|
| 114 |
" data = HFAgentsCourseAPI.get_all_questions()\n",
|
| 115 |
-
" with open(
|
| 116 |
" json.dump(data, f, indent=4)\n",
|
| 117 |
"else:\n",
|
| 118 |
-
" with open(
|
| 119 |
-
" data = json.load(f)
|
| 120 |
-
"\n",
|
| 121 |
-
"# download task files\n",
|
| 122 |
-
"for el in data:\n",
|
| 123 |
-
" if el[\"file_name\"] and not os.path.exists(data_dir + el[\"file_name\"]):\n",
|
| 124 |
-
" file = HFAgentsCourseAPI.get_task_file(el[\"task_id\"])\n",
|
| 125 |
-
" with open(data_dir + el[\"file_name\"], \"wb\") as f:\n",
|
| 126 |
-
" f.write(file) "
|
| 127 |
]
|
| 128 |
},
|
| 129 |
{
|
| 130 |
"cell_type": "code",
|
| 131 |
-
"execution_count":
|
| 132 |
"id": "94f848b8",
|
| 133 |
"metadata": {},
|
| 134 |
"outputs": [],
|
|
@@ -165,12 +171,12 @@
|
|
| 165 |
},
|
| 166 |
{
|
| 167 |
"cell_type": "code",
|
| 168 |
-
"execution_count":
|
| 169 |
"id": "2baeb38b",
|
| 170 |
"metadata": {},
|
| 171 |
"outputs": [],
|
| 172 |
"source": [
|
| 173 |
-
"#
|
| 174 |
"\n",
|
| 175 |
"def get_page(page_query:str):\n",
|
| 176 |
" \"\"\"Send a query to wikipedia and return the text of the page found if it is found, else return an empty string.\"\"\"\n",
|
|
@@ -184,10 +190,98 @@
|
|
| 184 |
" \"\"\"Reverse a string.\"\"\"\n",
|
| 185 |
" return s[::-1]\n",
|
| 186 |
"\n",
|
| 187 |
-
"
|
| 188 |
-
" \"\"\"
|
| 189 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 190 |
"\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 191 |
"wiki_page_tool = FunctionTool.from_defaults(\n",
|
| 192 |
" get_page,\n",
|
| 193 |
" name=\"WikipediaTool\",\n",
|
|
@@ -198,7 +292,36 @@
|
|
| 198 |
" reverse_string,\n",
|
| 199 |
" name=\"ReverseStringTool\",\n",
|
| 200 |
" description=\"Reverse a string and return it.\",\n",
|
| 201 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 202 |
")\n",
|
| 203 |
"\n",
|
| 204 |
"tavily_tools = TavilyToolSpec(\n",
|
|
@@ -211,161 +334,569 @@
|
|
| 211 |
" description=\"A tool for reading web pages. Provide a URL to read the content of the page.\",\n",
|
| 212 |
")\n",
|
| 213 |
"\n",
|
|
|
|
| 214 |
"tools = [\n",
|
| 215 |
" wiki_page_tool,\n",
|
|
|
|
| 216 |
" reverse_string_tool,\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 217 |
" web_page_reader_tool,\n",
|
| 218 |
"] + tavily_tools"
|
| 219 |
]
|
| 220 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 221 |
{
|
| 222 |
"cell_type": "code",
|
| 223 |
-
"execution_count":
|
| 224 |
-
"id": "
|
| 225 |
"metadata": {},
|
| 226 |
"outputs": [],
|
| 227 |
"source": [
|
| 228 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 229 |
"\n",
|
| 230 |
-
"
|
| 231 |
-
"
|
| 232 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 233 |
" llm=nemotron_super,\n",
|
| 234 |
" tools=tools,\n",
|
| 235 |
" system_prompt=\"detailed thinking off\",\n",
|
| 236 |
")\n",
|
| 237 |
"\n",
|
| 238 |
-
"
|
| 239 |
-
"
|
| 240 |
-
"
|
| 241 |
-
"
|
| 242 |
-
"
|
|
|
|
|
|
|
| 243 |
")\n",
|
| 244 |
"\n",
|
| 245 |
-
"
|
| 246 |
-
"
|
| 247 |
-
"
|
|
|
|
| 248 |
" llm=nemotron_super,\n",
|
| 249 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 250 |
")"
|
| 251 |
]
|
| 252 |
},
|
| 253 |
{
|
| 254 |
-
"cell_type": "
|
| 255 |
-
"
|
|
|
|
| 256 |
"metadata": {},
|
|
|
|
| 257 |
"source": [
|
| 258 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 259 |
]
|
| 260 |
},
|
| 261 |
{
|
| 262 |
"cell_type": "code",
|
| 263 |
-
"execution_count":
|
| 264 |
-
"id": "
|
| 265 |
"metadata": {},
|
| 266 |
"outputs": [
|
| 267 |
{
|
| 268 |
"name": "stdout",
|
| 269 |
"output_type": "stream",
|
| 270 |
"text": [
|
| 271 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 272 |
]
|
| 273 |
}
|
| 274 |
],
|
| 275 |
"source": [
|
| 276 |
-
"
|
| 277 |
-
"
|
| 278 |
-
"
|
| 279 |
-
"
|
| 280 |
-
"
|
| 281 |
-
"
|
| 282 |
-
"
|
| 283 |
-
"
|
| 284 |
-
"
|
| 285 |
-
"
|
| 286 |
-
"
|
| 287 |
-
"
|
| 288 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 289 |
]
|
| 290 |
},
|
| 291 |
{
|
| 292 |
-
"cell_type": "
|
| 293 |
-
"
|
|
|
|
| 294 |
"metadata": {},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 295 |
"source": [
|
| 296 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 297 |
]
|
| 298 |
},
|
| 299 |
{
|
| 300 |
"cell_type": "code",
|
| 301 |
-
"execution_count":
|
| 302 |
-
"id": "
|
| 303 |
"metadata": {},
|
| 304 |
"outputs": [
|
| 305 |
{
|
| 306 |
"name": "stdout",
|
| 307 |
"output_type": "stream",
|
| 308 |
"text": [
|
| 309 |
-
"
|
| 310 |
-
"your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER].\n",
|
| 311 |
-
"YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of\n",
|
| 312 |
-
"numbers and/or strings.\n",
|
| 313 |
-
"If you are asked for a number, don’t use comma to write your number neither use units such as $ or percent\n",
|
| 314 |
-
"sign unless specified otherwise.\n",
|
| 315 |
-
"If you are asked for a string, don’t use articles, neither abbreviations (e.g. for cities), and write the digits in\n",
|
| 316 |
-
"plain text unless specified otherwise.\n",
|
| 317 |
-
"If you are asked for a comma separated list, apply the above rules depending of whether the element to be put\n",
|
| 318 |
-
"in the list is a number or a string. \n",
|
| 319 |
-
"Question: How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia.\n",
|
| 320 |
-
"Agent response: Thought: After reviewing the provided Wikipedia page for Mercedes Sosa, I've located the discography section which lists her studio albums. To determine how many were published between 2000 and 2009 (inclusive), I'll examine the \"Studio albums\" table for release years within that range.\n",
|
| 321 |
-
"\n",
|
| 322 |
-
"Action: None (Information found within provided observation)\n",
|
| 323 |
-
"\n",
|
| 324 |
-
"Observation Analysis:\n",
|
| 325 |
-
"- **2005**: Corazón Libre\n",
|
| 326 |
-
"- **2009**: Cantora 1, Cantora 2\n",
|
| 327 |
"\n",
|
| 328 |
-
"
|
| 329 |
-
"
|
| 330 |
-
"
|
|
|
|
|
|
|
| 331 |
]
|
| 332 |
}
|
| 333 |
],
|
| 334 |
"source": [
|
| 335 |
-
"
|
| 336 |
-
"
|
| 337 |
-
"
|
| 338 |
-
"
|
| 339 |
-
"
|
| 340 |
-
"
|
| 341 |
-
" blocks=TextBlock(text=f\"{GAIA_PROMPT}\\nQuestion: {example['question']}\")\n",
|
| 342 |
-
" )\n",
|
| 343 |
" \n",
|
| 344 |
-
"
|
| 345 |
-
"
|
| 346 |
-
" file_path = os.path.join(DATA_DIR, example[\"file_name\"])\n",
|
| 347 |
-
" if example[\"file_name\"].endswith(\".mp3\"):\n",
|
| 348 |
-
" file_block = AudioBlock(path=file_path, format=\"mp3\")\n",
|
| 349 |
-
" elif example[\"file_name\"].endswith(\".py\"):\n",
|
| 350 |
-
" print(\"Python file found, skipping...\")\n",
|
| 351 |
-
" continue\n",
|
| 352 |
-
" elif example[\"file_name\"].endswith(\".png\"):\n",
|
| 353 |
-
" print(\"Image file found, skipping...\")\n",
|
| 354 |
-
" continue\n",
|
| 355 |
-
" elif example[\"file_name\"].endswith(\".xlsx\"):\n",
|
| 356 |
-
" print(\"Excel file found, skipping...\")\n",
|
| 357 |
-
" continue\n",
|
| 358 |
" \n",
|
| 359 |
-
"
|
| 360 |
-
"
|
| 361 |
-
"
|
| 362 |
-
"
|
| 363 |
-
"
|
| 364 |
-
"
|
| 365 |
-
"
|
| 366 |
-
"
|
| 367 |
-
"
|
| 368 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 369 |
]
|
| 370 |
}
|
| 371 |
],
|
|
|
|
| 18 |
},
|
| 19 |
{
|
| 20 |
"cell_type": "code",
|
| 21 |
+
"execution_count": 3,
|
| 22 |
"id": "a7101ff8",
|
| 23 |
"metadata": {},
|
| 24 |
"outputs": [
|
|
|
|
| 26 |
"name": "stderr",
|
| 27 |
"output_type": "stream",
|
| 28 |
"text": [
|
| 29 |
+
"/home/laiking/code/learning/agents-course-pj/.venv/lib/python3.12/site-packages/pydantic/_internal/_config.py:323: PydanticDeprecatedSince20: Support for class-based `config` is deprecated, use ConfigDict instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.11/migration/\n",
|
| 30 |
" warnings.warn(DEPRECATION_MESSAGE, DeprecationWarning)\n"
|
| 31 |
]
|
| 32 |
}
|
|
|
|
| 38 |
"import warnings\n",
|
| 39 |
"import requests\n",
|
| 40 |
"import mwclient\n",
|
| 41 |
+
"import asyncio\n",
|
| 42 |
+
"import pandas as pd\n",
|
| 43 |
+
"import subprocess\n",
|
| 44 |
+
"import tempfile\n",
|
| 45 |
"from dotenv import load_dotenv\n",
|
| 46 |
+
"from typing import List, Dict, Optional\n",
|
| 47 |
+
"from urllib.parse import urlparse, parse_qs\n",
|
| 48 |
+
"from youtube_transcript_api import YouTubeTranscriptApi\n",
|
| 49 |
"\n",
|
| 50 |
"from llama_index.core.tools import FunctionTool\n",
|
| 51 |
"from llama_index.llms.mistralai import MistralAI\n",
|
| 52 |
"from llama_index.llms.openrouter import OpenRouter\n",
|
| 53 |
"from llama_index.llms.google_genai import GoogleGenAI\n",
|
|
|
|
|
|
|
| 54 |
"from llama_index.readers.web import BeautifulSoupWebReader\n",
|
| 55 |
"from llama_index.tools.tavily_research import TavilyToolSpec\n",
|
|
|
|
| 56 |
"from llama_index.core.tools.ondemand_loader_tool import OnDemandLoaderTool\n",
|
| 57 |
+
"from llama_index.core.llms import ChatMessage, TextBlock, ImageBlock, AudioBlock\n",
|
| 58 |
+
"from llama_index.core.agent.workflow import ReActAgent,FunctionAgent, AgentWorkflow\n",
|
| 59 |
"\n",
|
| 60 |
"from pydantic.warnings import PydanticDeprecatedSince20, PydanticDeprecatedSince211\n",
|
| 61 |
+
"\n",
|
| 62 |
"# Load environment variables from .env file\n",
|
| 63 |
"load_dotenv()\n",
|
| 64 |
"# Disable pydantic deprecation warnings\n",
|
|
|
|
| 68 |
},
|
| 69 |
{
|
| 70 |
"cell_type": "code",
|
| 71 |
+
"execution_count": 4,
|
| 72 |
"id": "a4bfbbc9",
|
| 73 |
"metadata": {},
|
| 74 |
"outputs": [],
|
|
|
|
| 96 |
" raise Exception(f\"Failed to fetch task file for task_id {task_id}: {response.text}\")\n",
|
| 97 |
" \n",
|
| 98 |
" @staticmethod\n",
|
| 99 |
+
" def submit_agent_answers(username: str,agent_code: str,answers: List[Dict[str, str]]) -> Optional[Dict]:\n",
|
| 100 |
+
" \"\"\"answers keys are task_id and submitted_answer\"\"\"\n",
|
| 101 |
+
" url = f\"{HFAgentsCourseAPI.API_URL}/submit\"\n",
|
| 102 |
+
" payload = {\"username\": username,\"agent_code\": agent_code,\"answers\": answers}\n",
|
| 103 |
+
" headers = {\"Content-Type\": \"application/json\"}\n",
|
| 104 |
+
" try:\n",
|
| 105 |
+
" response = requests.post(url,json.dumps(payload),headers=headers,timeout=30)\n",
|
| 106 |
+
" response.raise_for_status()\n",
|
| 107 |
+
" return response.json()\n",
|
| 108 |
+
" except requests.exceptions.RequestException as e:\n",
|
| 109 |
+
" print(f\"Erreur lors de la requête: {e}\")\n",
|
| 110 |
+
" return None\n",
|
| 111 |
+
" except json.JSONDecodeError as e:\n",
|
| 112 |
+
" print(f\"Erreur lors du décodage JSON: {e}\")\n",
|
| 113 |
+
" return None"
|
| 114 |
]
|
| 115 |
},
|
| 116 |
{
|
| 117 |
"cell_type": "code",
|
| 118 |
+
"execution_count": 11,
|
| 119 |
"id": "ab28e8d7",
|
| 120 |
"metadata": {},
|
| 121 |
"outputs": [],
|
| 122 |
"source": [
|
| 123 |
"# data loading and downloading (if not)\n",
|
| 124 |
+
"filepath = \"/home/laiking/code/learning/agents-course-pj/data/gaia-tasks.json\"\n",
|
|
|
|
|
|
|
| 125 |
"# download questions\n",
|
| 126 |
+
"if not os.path.exists(filepath):\n",
|
| 127 |
" data = HFAgentsCourseAPI.get_all_questions()\n",
|
| 128 |
+
" with open(filepath, \"w\") as f:\n",
|
| 129 |
" json.dump(data, f, indent=4)\n",
|
| 130 |
"else:\n",
|
| 131 |
+
" with open(filepath, \"r\") as f:\n",
|
| 132 |
+
" data = json.load(f)"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
]
|
| 134 |
},
|
| 135 |
{
|
| 136 |
"cell_type": "code",
|
| 137 |
+
"execution_count": 6,
|
| 138 |
"id": "94f848b8",
|
| 139 |
"metadata": {},
|
| 140 |
"outputs": [],
|
|
|
|
| 171 |
},
|
| 172 |
{
|
| 173 |
"cell_type": "code",
|
| 174 |
+
"execution_count": 7,
|
| 175 |
"id": "2baeb38b",
|
| 176 |
"metadata": {},
|
| 177 |
"outputs": [],
|
| 178 |
"source": [
|
| 179 |
+
"# Tools\n",
|
| 180 |
"\n",
|
| 181 |
"def get_page(page_query:str):\n",
|
| 182 |
" \"\"\"Send a query to wikipedia and return the text of the page found if it is found, else return an empty string.\"\"\"\n",
|
|
|
|
| 190 |
" \"\"\"Reverse a string.\"\"\"\n",
|
| 191 |
" return s[::-1]\n",
|
| 192 |
"\n",
|
| 193 |
+
"def get_youtube_transcript(video_url: str) -> str:\n",
|
| 194 |
+
" \"\"\"Get the transcript/subtitles of a YouTube video.\"\"\"\n",
|
| 195 |
+
" try:\n",
|
| 196 |
+
" # Extract video ID from URL\n",
|
| 197 |
+
" parsed_url = urlparse(video_url)\n",
|
| 198 |
+
" if parsed_url.hostname == 'youtu.be':\n",
|
| 199 |
+
" video_id = parsed_url.path[1:]\n",
|
| 200 |
+
" elif parsed_url.hostname in ('www.youtube.com', 'youtube.com'):\n",
|
| 201 |
+
" if 'watch' in parsed_url.path:\n",
|
| 202 |
+
" video_id = parse_qs(parsed_url.query)['v'][0]\n",
|
| 203 |
+
" elif 'embed' in parsed_url.path:\n",
|
| 204 |
+
" video_id = parsed_url.path.split('/')[-1]\n",
|
| 205 |
+
" else:\n",
|
| 206 |
+
" return \"Invalid YouTube URL\"\n",
|
| 207 |
+
" \n",
|
| 208 |
+
" # Get transcript\n",
|
| 209 |
+
" transcript_list = YouTubeTranscriptApi.get_transcript(video_id)\n",
|
| 210 |
+
" transcript_text = \" \".join([item['text'] for item in transcript_list])\n",
|
| 211 |
+
" return transcript_text\n",
|
| 212 |
+
" except Exception as e:\n",
|
| 213 |
+
" return f\"Error getting transcript: {str(e)}\"\n",
|
| 214 |
+
"\n",
|
| 215 |
+
"def execute_python_code(code: str) -> str:\n",
|
| 216 |
+
" \"\"\"Execute Python code safely and return the output.\"\"\"\n",
|
| 217 |
+
" try:\n",
|
| 218 |
+
" # Create a temporary file\n",
|
| 219 |
+
" with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f:\n",
|
| 220 |
+
" f.write(code)\n",
|
| 221 |
+
" temp_file = f.name\n",
|
| 222 |
+
" # Execute the code\n",
|
| 223 |
+
" result = subprocess.run(['python', temp_file], \n",
|
| 224 |
+
" capture_output=True, text=True, timeout=30)\n",
|
| 225 |
+
" # Clean up\n",
|
| 226 |
+
" os.unlink(temp_file)\n",
|
| 227 |
+
" if result.returncode == 0:\n",
|
| 228 |
+
" return f\"Output: {result.stdout}\"\n",
|
| 229 |
+
" else:\n",
|
| 230 |
+
" return f\"Error: {result.stderr}\"\n",
|
| 231 |
+
" except Exception as e:\n",
|
| 232 |
+
" return f\"Execution error: {str(e)}\"\n",
|
| 233 |
+
"\n",
|
| 234 |
+
"def read_excel_file(file_path: str) -> str:\n",
|
| 235 |
+
" \"\"\"Read an Excel file and return its content as text.\"\"\"\n",
|
| 236 |
+
" try:\n",
|
| 237 |
+
" # Read all sheets\n",
|
| 238 |
+
" excel_file = pd.ExcelFile(file_path)\n",
|
| 239 |
+
" result = []\n",
|
| 240 |
+
" \n",
|
| 241 |
+
" for sheet_name in excel_file.sheet_names:\n",
|
| 242 |
+
" df = pd.read_excel(file_path, sheet_name=sheet_name)\n",
|
| 243 |
+
" result.append(f\"Sheet: {sheet_name}\")\n",
|
| 244 |
+
" result.append(df.to_string())\n",
|
| 245 |
+
" result.append(\"\\n\" + \"=\"*50 + \"\\n\")\n",
|
| 246 |
+
" \n",
|
| 247 |
+
" return \"\\n\".join(result)\n",
|
| 248 |
+
" except Exception as e:\n",
|
| 249 |
+
" return f\"Error reading Excel file: {str(e)}\"\n",
|
| 250 |
"\n",
|
| 251 |
+
"def get_task_file_content(task_id: str) -> str:\n",
|
| 252 |
+
" \"\"\"Get the content of a task file using the HF API.\"\"\"\n",
|
| 253 |
+
" try:\n",
|
| 254 |
+
" file_content = HFAgentsCourseAPI.get_task_file(task_id)\n",
|
| 255 |
+
" # For text files, decode and return content\n",
|
| 256 |
+
" if isinstance(file_content, bytes):\n",
|
| 257 |
+
" try:\n",
|
| 258 |
+
" return file_content.decode('utf-8')\n",
|
| 259 |
+
" except UnicodeDecodeError:\n",
|
| 260 |
+
" return f\"Binary file content (size: {len(file_content)} bytes)\"\n",
|
| 261 |
+
" return str(file_content)\n",
|
| 262 |
+
" except Exception as e:\n",
|
| 263 |
+
" return f\"Error getting task file: {str(e)}\"\n",
|
| 264 |
+
"\n",
|
| 265 |
+
"def search_wikipedia_articles(query: str, limit: int = 5) -> str:\n",
|
| 266 |
+
" \"\"\"Search for Wikipedia articles and return summaries.\"\"\"\n",
|
| 267 |
+
" try:\n",
|
| 268 |
+
" site = mwclient.Site('en.wikipedia.org')\n",
|
| 269 |
+
" search_results = list(site.search(query, limit=limit))\n",
|
| 270 |
+
" \n",
|
| 271 |
+
" results = []\n",
|
| 272 |
+
" for page_info in search_results:\n",
|
| 273 |
+
" page = site.pages[page_info['title']]\n",
|
| 274 |
+
" if page.exists:\n",
|
| 275 |
+
" # Get first paragraph as summary\n",
|
| 276 |
+
" content = page.text()\n",
|
| 277 |
+
" first_para = content.split('\\n\\n')[0] if content else \"No content\"\n",
|
| 278 |
+
" results.append(f\"Title: {page_info['title']}\\nSummary: {first_para[:500]}...\")\n",
|
| 279 |
+
" \n",
|
| 280 |
+
" return \"\\n\\n\".join(results) if results else \"No results found\"\n",
|
| 281 |
+
" except Exception as e:\n",
|
| 282 |
+
" return f\"Error searching Wikipedia: {str(e)}\"\n",
|
| 283 |
+
"\n",
|
| 284 |
+
"# Create tools\n",
|
| 285 |
"wiki_page_tool = FunctionTool.from_defaults(\n",
|
| 286 |
" get_page,\n",
|
| 287 |
" name=\"WikipediaTool\",\n",
|
|
|
|
| 292 |
" reverse_string,\n",
|
| 293 |
" name=\"ReverseStringTool\",\n",
|
| 294 |
" description=\"Reverse a string and return it.\",\n",
|
| 295 |
+
")\n",
|
| 296 |
+
"\n",
|
| 297 |
+
"youtube_transcript_tool = FunctionTool.from_defaults(\n",
|
| 298 |
+
" get_youtube_transcript,\n",
|
| 299 |
+
" name=\"YouTubeTranscriptTool\",\n",
|
| 300 |
+
" description=\"Get the transcript/subtitles of a YouTube video. Provide the full YouTube URL.\",\n",
|
| 301 |
+
")\n",
|
| 302 |
+
"\n",
|
| 303 |
+
"python_executor_tool = FunctionTool.from_defaults(\n",
|
| 304 |
+
" execute_python_code,\n",
|
| 305 |
+
" name=\"PythonExecutorTool\",\n",
|
| 306 |
+
" description=\"Execute Python code and return the output. Use this for running Python scripts or code analysis.\",\n",
|
| 307 |
+
")\n",
|
| 308 |
+
"\n",
|
| 309 |
+
"excel_reader_tool = FunctionTool.from_defaults(\n",
|
| 310 |
+
" read_excel_file,\n",
|
| 311 |
+
" name=\"ExcelReaderTool\",\n",
|
| 312 |
+
" description=\"Read an Excel file and return its content. Provide the full file path.\",\n",
|
| 313 |
+
")\n",
|
| 314 |
+
"\n",
|
| 315 |
+
"task_file_tool = FunctionTool.from_defaults(\n",
|
| 316 |
+
" get_task_file_content,\n",
|
| 317 |
+
" name=\"TaskFileTool\",\n",
|
| 318 |
+
" description=\"Get the content of a task file using the task ID.\",\n",
|
| 319 |
+
")\n",
|
| 320 |
+
"\n",
|
| 321 |
+
"wiki_search_tool = FunctionTool.from_defaults(\n",
|
| 322 |
+
" search_wikipedia_articles,\n",
|
| 323 |
+
" name=\"WikipediaSearchTool\",\n",
|
| 324 |
+
" description=\"Search for Wikipedia articles and return summaries. Useful for finding relevant articles.\",\n",
|
| 325 |
")\n",
|
| 326 |
"\n",
|
| 327 |
"tavily_tools = TavilyToolSpec(\n",
|
|
|
|
| 334 |
" description=\"A tool for reading web pages. Provide a URL to read the content of the page.\",\n",
|
| 335 |
")\n",
|
| 336 |
"\n",
|
| 337 |
+
"# Enhanced tool list\n",
|
| 338 |
"tools = [\n",
|
| 339 |
" wiki_page_tool,\n",
|
| 340 |
+
" wiki_search_tool,\n",
|
| 341 |
" reverse_string_tool,\n",
|
| 342 |
+
" youtube_transcript_tool,\n",
|
| 343 |
+
" python_executor_tool,\n",
|
| 344 |
+
" excel_reader_tool,\n",
|
| 345 |
+
" task_file_tool,\n",
|
| 346 |
" web_page_reader_tool,\n",
|
| 347 |
"] + tavily_tools"
|
| 348 |
]
|
| 349 |
},
|
| 350 |
+
{
|
| 351 |
+
"cell_type": "markdown",
|
| 352 |
+
"id": "87726303",
|
| 353 |
+
"metadata": {},
|
| 354 |
+
"source": [
|
| 355 |
+
"## Multi-Agent Workflow with Multimodal Inputs\n",
|
| 356 |
+
"\n",
|
| 357 |
+
"In this workflow, we have:\n",
|
| 358 |
+
"1. **ManagerAgent**: Main agent that routes tasks but only handles text\n",
|
| 359 |
+
"2. **MultimodalAgent**: Specialized agent that can process images, audio, videos\n",
|
| 360 |
+
"3. **ReasoningAgent**: Agent for complex reasoning\n",
|
| 361 |
+
"\n",
|
| 362 |
+
"The ManagerAgent detects if the input contains multimodal elements and automatically delegates to the appropriate agent."
|
| 363 |
+
]
|
| 364 |
+
},
|
| 365 |
{
|
| 366 |
"cell_type": "code",
|
| 367 |
+
"execution_count": 8,
|
| 368 |
+
"id": "96f7fa7a",
|
| 369 |
"metadata": {},
|
| 370 |
"outputs": [],
|
| 371 |
"source": [
|
| 372 |
+
"def has_multimodal_content(message: ChatMessage) -> bool:\n",
|
| 373 |
+
" \"\"\"Detects if a message contains multimodal content (images, audio, video)\"\"\"\n",
|
| 374 |
+
" if hasattr(message, 'content'):\n",
|
| 375 |
+
" # If content is a list of blocks\n",
|
| 376 |
+
" if isinstance(message.content, list):\n",
|
| 377 |
+
" for block in message.content:\n",
|
| 378 |
+
" if isinstance(block, (ImageBlock, AudioBlock)):\n",
|
| 379 |
+
" return True\n",
|
| 380 |
+
" # If content is a single block\n",
|
| 381 |
+
" elif isinstance(message.content, (ImageBlock, AudioBlock)):\n",
|
| 382 |
+
" return True\n",
|
| 383 |
+
" return False\n",
|
| 384 |
"\n",
|
| 385 |
+
"def extract_text_from_message(message: ChatMessage) -> str:\n",
|
| 386 |
+
" \"\"\"Extracts only the text from a multimodal message\"\"\"\n",
|
| 387 |
+
" text_parts = []\n",
|
| 388 |
+
" if hasattr(message, 'content'):\n",
|
| 389 |
+
" if isinstance(message.content, list):\n",
|
| 390 |
+
" for block in message.content:\n",
|
| 391 |
+
" if isinstance(block, TextBlock):\n",
|
| 392 |
+
" text_parts.append(block.text)\n",
|
| 393 |
+
" elif isinstance(message.content, TextBlock):\n",
|
| 394 |
+
" text_parts.append(message.content.text)\n",
|
| 395 |
+
" elif isinstance(message.content, str):\n",
|
| 396 |
+
" text_parts.append(message.content)\n",
|
| 397 |
+
" return \"\\n\".join(text_parts)"
|
| 398 |
+
]
|
| 399 |
+
},
|
| 400 |
+
{
|
| 401 |
+
"cell_type": "code",
|
| 402 |
+
"execution_count": 9,
|
| 403 |
+
"id": "0ee22893",
|
| 404 |
+
"metadata": {},
|
| 405 |
+
"outputs": [],
|
| 406 |
+
"source": [
|
| 407 |
+
"# Configuration of agents for multimodal workflow\n",
|
| 408 |
+
"\n",
|
| 409 |
+
"# Specialized system prompts for different task types\n",
|
| 410 |
+
"GAIA_PROMPT = \"You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.\"\n",
|
| 411 |
+
"MULTIMODAL_PROMPT = \"You are a multimodal specialist. You can process images, audio, and video content. For chess positions, analyze the board carefully. For audio, transcribe and analyze the content. Be precise in your analysis.\"\n",
|
| 412 |
+
"MATH_PROMPT = \"You are a mathematical reasoning specialist. You excel at logical analysis, mathematical operations, and structured problem solving. Break down complex problems step by step.\"\n",
|
| 413 |
+
"\n",
|
| 414 |
+
"# Main coordinator agent (text only, with all tools)\n",
|
| 415 |
+
"coordinator_agent = ReActAgent(\n",
|
| 416 |
+
" name=\"CoordinatorAgent\",\n",
|
| 417 |
+
" description=\"Main coordinator that handles general tasks and delegates to specialists\",\n",
|
| 418 |
" llm=nemotron_super,\n",
|
| 419 |
" tools=tools,\n",
|
| 420 |
" system_prompt=\"detailed thinking off\",\n",
|
| 421 |
")\n",
|
| 422 |
"\n",
|
| 423 |
+
"# Research specialist agent\n",
|
| 424 |
+
"research_agent = ReActAgent(\n",
|
| 425 |
+
" name=\"ResearchAgent\", \n",
|
| 426 |
+
" description=\"Specialist for research tasks using Wikipedia and web search\",\n",
|
| 427 |
+
" llm=nemotron_super,\n",
|
| 428 |
+
" tools=[wiki_page_tool, wiki_search_tool, web_page_reader_tool] + tavily_tools,\n",
|
| 429 |
+
" system_prompt=\"detailed thinking off\",\n",
|
| 430 |
")\n",
|
| 431 |
"\n",
|
| 432 |
+
"# Coding specialist agent\n",
|
| 433 |
+
"coding_agent = ReActAgent(\n",
|
| 434 |
+
" name=\"CodingAgent\",\n",
|
| 435 |
+
" description=\"Specialist for code execution and file analysis\",\n",
|
| 436 |
" llm=nemotron_super,\n",
|
| 437 |
+
" tools=[python_executor_tool, excel_reader_tool, task_file_tool],\n",
|
| 438 |
+
" system_prompt=\"detailed thinking off\",\n",
|
| 439 |
+
")\n",
|
| 440 |
+
"\n",
|
| 441 |
+
"# Multimodal agent (can process images, audio, text)\n",
|
| 442 |
+
"enhanced_multimodal_agent = FunctionAgent(\n",
|
| 443 |
+
" name=\"EnhancedMultimodalAgent\",\n",
|
| 444 |
+
" description=\"Agent specialized for multimodal content (images, audio, video)\",\n",
|
| 445 |
+
" llm=gemini_2_5_flash, # Gemini has good multimodal support\n",
|
| 446 |
+
" system_prompt=MULTIMODAL_PROMPT,\n",
|
| 447 |
+
")\n",
|
| 448 |
+
"\n",
|
| 449 |
+
"# Enhanced reasoning agent for mathematics and logic\n",
|
| 450 |
+
"enhanced_reasoning_agent = FunctionAgent(\n",
|
| 451 |
+
" name=\"EnhancedReasoningAgent\", \n",
|
| 452 |
+
" description=\"Specialist for complex reasoning, mathematics, and logical analysis\",\n",
|
| 453 |
+
" llm=magistral_sm,\n",
|
| 454 |
+
" system_prompt=MATH_PROMPT,\n",
|
| 455 |
+
")\n",
|
| 456 |
+
"\n",
|
| 457 |
+
"# YouTube specialist agent\n",
|
| 458 |
+
"youtube_agent = ReActAgent(\n",
|
| 459 |
+
" name=\"YouTubeAgent\",\n",
|
| 460 |
+
" description=\"Specialist for YouTube video analysis and transcript processing\",\n",
|
| 461 |
+
" llm=nemotron_super,\n",
|
| 462 |
+
" tools=[youtube_transcript_tool, web_page_reader_tool],\n",
|
| 463 |
+
" system_prompt=\"You specialize in analyzing YouTube videos. Use transcript tools to get video content and analyze it carefully.\",\n",
|
| 464 |
+
")\n",
|
| 465 |
+
"\n",
|
| 466 |
+
"multi_agent_workflow = AgentWorkflow(\n",
|
| 467 |
+
" agents=[coordinator_agent, research_agent, coding_agent, enhanced_multimodal_agent, enhanced_reasoning_agent, youtube_agent],\n",
|
| 468 |
+
" root_agent=coordinator_agent.name,\n",
|
| 469 |
")"
|
| 470 |
]
|
| 471 |
},
|
| 472 |
{
|
| 473 |
+
"cell_type": "code",
|
| 474 |
+
"execution_count": 7,
|
| 475 |
+
"id": "f6b20562",
|
| 476 |
"metadata": {},
|
| 477 |
+
"outputs": [],
|
| 478 |
"source": [
|
| 479 |
+
"# Final optimized processing with both enhanced agents and quick solvers\n",
|
| 480 |
+
"async def process_gaia_optimized():\n",
|
| 481 |
+
" \"\"\"Process GAIA examples with optimized approach combining quick solvers and enhanced agents\"\"\"\n",
|
| 482 |
+
" \n",
|
| 483 |
+
" answers = []\n",
|
| 484 |
+
" \n",
|
| 485 |
+
" for i, example in enumerate(data):\n",
|
| 486 |
+
" print(f\"\\n=== Processing example {i+1}/{len(data)} ===\")\n",
|
| 487 |
+
" print(f\"Task ID: {example['task_id']}\")\n",
|
| 488 |
+
" print(f\"Question: {example['question'][:100]}...\")\n",
|
| 489 |
+
" base_prompt = f\"{GAIA_PROMPT}\\nQuestion: {example['question']}\"\n",
|
| 490 |
+
" try:\n",
|
| 491 |
+
" \n",
|
| 492 |
+
" # For remaining tasks, use enhanced agent system\n",
|
| 493 |
+
" if example[\"file_name\"]:\n",
|
| 494 |
+
" file_path = f\"/home/laiking/code/learning/agents-course-pj/data/{example['file_name']}\"\n",
|
| 495 |
+
" print(f\"File: {example['file_name']}\")\n",
|
| 496 |
+
" \n",
|
| 497 |
+
" if example[\"file_name\"].endswith(\".mp3\"):\n",
|
| 498 |
+
" message = ChatMessage(\n",
|
| 499 |
+
" role=\"user\",\n",
|
| 500 |
+
" content=[\n",
|
| 501 |
+
" TextBlock(text=base_prompt),\n",
|
| 502 |
+
" AudioBlock(path=file_path)\n",
|
| 503 |
+
" ]\n",
|
| 504 |
+
" )\n",
|
| 505 |
+
" elif example[\"file_name\"].endswith((\".png\", \".jpg\", \".jpeg\")):\n",
|
| 506 |
+
" message = ChatMessage(\n",
|
| 507 |
+
" role=\"user\",\n",
|
| 508 |
+
" content=[\n",
|
| 509 |
+
" TextBlock(text=base_prompt),\n",
|
| 510 |
+
" ImageBlock(path=file_path)\n",
|
| 511 |
+
" ]\n",
|
| 512 |
+
" )\n",
|
| 513 |
+
" elif example[\"file_name\"].endswith(\".py\"):\n",
|
| 514 |
+
" # For Python files, read and execute\n",
|
| 515 |
+
" try:\n",
|
| 516 |
+
" with open(file_path, 'r') as f:\n",
|
| 517 |
+
" code_content = f.read()\n",
|
| 518 |
+
" \n",
|
| 519 |
+
" # Execute the code and get output\n",
|
| 520 |
+
" exec_result = execute_python_code(code_content)\n",
|
| 521 |
+
" \n",
|
| 522 |
+
" message = ChatMessage(\n",
|
| 523 |
+
" role=\"user\",\n",
|
| 524 |
+
" content=f\"{base_prompt}\\n\\nCode execution result: ```\\n{exec_result}\\n```\"\n",
|
| 525 |
+
" )\n",
|
| 526 |
+
" except Exception as e:\n",
|
| 527 |
+
" message = ChatMessage(\n",
|
| 528 |
+
" role=\"user\",\n",
|
| 529 |
+
" content=f\"{base_prompt}\\nError executing code: {e}\"\n",
|
| 530 |
+
" )\n",
|
| 531 |
+
" elif example[\"file_name\"].endswith(\".xlsx\"):\n",
|
| 532 |
+
" # For Excel files, read the content\n",
|
| 533 |
+
" try:\n",
|
| 534 |
+
" excel_content = read_excel_file(file_path)\n",
|
| 535 |
+
" message = ChatMessage(\n",
|
| 536 |
+
" role=\"user\",\n",
|
| 537 |
+
" content=f\"{base_prompt}\\n\\nExcel file content:\\n{excel_content}\"\n",
|
| 538 |
+
" )\n",
|
| 539 |
+
" except Exception as e:\n",
|
| 540 |
+
" message = ChatMessage(\n",
|
| 541 |
+
" role=\"user\",\n",
|
| 542 |
+
" content=f\"{base_prompt}\\nError reading Excel: {e}\"\n",
|
| 543 |
+
" )\n",
|
| 544 |
+
" else:\n",
|
| 545 |
+
" message = ChatMessage(\n",
|
| 546 |
+
" role=\"user\",\n",
|
| 547 |
+
" content=f\"{base_prompt}\\nNote: A file {example['file_name']} is associated with this question.\"\n",
|
| 548 |
+
" )\n",
|
| 549 |
+
" else:\n",
|
| 550 |
+
" message = ChatMessage(\n",
|
| 551 |
+
" role=\"user\",\n",
|
| 552 |
+
" content=base_prompt\n",
|
| 553 |
+
" )\n",
|
| 554 |
+
" \n",
|
| 555 |
+
" # Process with enhanced intelligent manager\n",
|
| 556 |
+
" result = await multi_agent_workflow.run(user_msg=message)\n",
|
| 557 |
+
" \n",
|
| 558 |
+
" # Extract final answer with improved pattern matching\n",
|
| 559 |
+
" patterns = [\n",
|
| 560 |
+
" r'final\\s+answer\\s*:\\s*(.*?)(?:\\n|$)',\n",
|
| 561 |
+
" r'answer\\s*:\\s*(.*?)(?:\\n|$)',\n",
|
| 562 |
+
" r'the\\s+answer\\s+is\\s*:?\\s*(.*?)(?:\\n|$)',\n",
|
| 563 |
+
" r'result\\s*:\\s*(.*?)(?:\\n|$)',\n",
|
| 564 |
+
" r'output\\s*:\\s*(.*?)(?:\\n|$)'\n",
|
| 565 |
+
" ]\n",
|
| 566 |
+
" \n",
|
| 567 |
+
" result_text = result.response.blocks[-1].text if hasattr(result.response, 'blocks') else str(result)\n",
|
| 568 |
+
" final_answer = \"No final answer found\"\n",
|
| 569 |
+
" \n",
|
| 570 |
+
" for pattern in patterns:\n",
|
| 571 |
+
" match = re.search(pattern, result_text, re.IGNORECASE | re.DOTALL)\n",
|
| 572 |
+
" if match:\n",
|
| 573 |
+
" final_answer = match.group(1).strip()\n",
|
| 574 |
+
" # Clean up the answer\n",
|
| 575 |
+
" final_answer = final_answer.split('\\n')[0] # Take only the first line\n",
|
| 576 |
+
" final_answer = re.sub(r'^[\"\\']|[\"\\']$', '', final_answer) # Remove quotes\n",
|
| 577 |
+
" break\n",
|
| 578 |
+
" \n",
|
| 579 |
+
" # If still no answer found, try to extract numbers or key words from the result\n",
|
| 580 |
+
" if final_answer == \"No final answer found\":\n",
|
| 581 |
+
" # Look for numbers in the result\n",
|
| 582 |
+
" numbers = re.findall(r'\\b\\d+(?:\\.\\d+)?\\b', result_text)\n",
|
| 583 |
+
" if numbers:\n",
|
| 584 |
+
" final_answer = numbers[-1] # Take the last number found\n",
|
| 585 |
+
" else:\n",
|
| 586 |
+
" # Look for key words\n",
|
| 587 |
+
" words = re.findall(r'\\b[a-zA-Z]+\\b', result_text)\n",
|
| 588 |
+
" if words:\n",
|
| 589 |
+
" # Take the last meaningful word (not common words)\n",
|
| 590 |
+
" common_words = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for', 'of', 'with', 'by', 'is', 'are', 'was', 'were', 'be', 'been', 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may', 'might', 'can', 'must'}\n",
|
| 591 |
+
" meaningful_words = [w for w in words if w.lower() not in common_words]\n",
|
| 592 |
+
" if meaningful_words:\n",
|
| 593 |
+
" final_answer = meaningful_words[-1]\n",
|
| 594 |
+
" \n",
|
| 595 |
+
" answers.append({\n",
|
| 596 |
+
" \"task_id\": example[\"task_id\"],\n",
|
| 597 |
+
" \"submitted_answer\": final_answer\n",
|
| 598 |
+
" })\n",
|
| 599 |
+
" \n",
|
| 600 |
+
" print(f\"✅ Answer: {final_answer}\")\n",
|
| 601 |
+
" \n",
|
| 602 |
+
" except Exception as e:\n",
|
| 603 |
+
" print(f\"❌ Error: {e}\")\n",
|
| 604 |
+
" answers.append({\n",
|
| 605 |
+
" \"task_id\": example[\"task_id\"],\n",
|
| 606 |
+
" \"submitted_answer\": \"Error occurred\"\n",
|
| 607 |
+
" })\n",
|
| 608 |
+
" \n",
|
| 609 |
+
" return answers"
|
| 610 |
]
|
| 611 |
},
|
| 612 |
{
|
| 613 |
"cell_type": "code",
|
| 614 |
+
"execution_count": null,
|
| 615 |
+
"id": "53a651b4",
|
| 616 |
"metadata": {},
|
| 617 |
"outputs": [
|
| 618 |
{
|
| 619 |
"name": "stdout",
|
| 620 |
"output_type": "stream",
|
| 621 |
"text": [
|
| 622 |
+
"🚀 Starting optimized GAIA processing...\n",
|
| 623 |
+
"\n",
|
| 624 |
+
"=== Processing example 1/20 ===\n",
|
| 625 |
+
"Task ID: 8e867cd7-cff9-4e6c-867a-ff5ddc2550be\n",
|
| 626 |
+
"Question: How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use...\n",
|
| 627 |
+
"✅ Answer: 3\n",
|
| 628 |
+
"\n",
|
| 629 |
+
"=== Processing example 2/20 ===\n",
|
| 630 |
+
"Task ID: a1e91b78-d3d8-4675-bb8d-62741b4b68a6\n",
|
| 631 |
+
"Question: In the video https://www.youtube.com/watch?v=L1vXCYZAYYM, what is the highest number of bird species...\n"
|
| 632 |
+
]
|
| 633 |
+
},
|
| 634 |
+
{
|
| 635 |
+
"name": "stderr",
|
| 636 |
+
"output_type": "stream",
|
| 637 |
+
"text": [
|
| 638 |
+
"/home/laiking/code/learning/agents-course-pj/.venv/lib/python3.12/site-packages/youtube_transcript_api/_api.py:273: DeprecationWarning: `get_transcript` is deprecated and will be removed in a future version. Use the `fetch` method instead!\n",
|
| 639 |
+
" warnings.warn(\n",
|
| 640 |
+
"/home/laiking/code/learning/agents-course-pj/.venv/lib/python3.12/site-packages/youtube_transcript_api/_api.py:170: DeprecationWarning: `list_transcripts` is deprecated and will be removed in a future version. Use the `list` method instead!\n",
|
| 641 |
+
" warnings.warn(\n"
|
| 642 |
+
]
|
| 643 |
+
},
|
| 644 |
+
{
|
| 645 |
+
"name": "stdout",
|
| 646 |
+
"output_type": "stream",
|
| 647 |
+
"text": [
|
| 648 |
+
"❌ Error: Error in step 'run_agent_step': Internal Server Error\n",
|
| 649 |
+
"\n",
|
| 650 |
+
"=== Processing example 3/20 ===\n",
|
| 651 |
+
"Task ID: 2d83110e-a098-4ebb-9987-066c06fa42d0\n",
|
| 652 |
+
"Question: .rewsna eht sa \"tfel\" drow eht fo etisoppo eht etirw ,ecnetnes siht dnatsrednu uoy fI...\n",
|
| 653 |
+
"✅ Answer: right\n",
|
| 654 |
+
"\n",
|
| 655 |
+
"=== Processing example 4/20 ===\n",
|
| 656 |
+
"Task ID: cca530fc-4052-43b2-b130-b30968d8aa44\n",
|
| 657 |
+
"Question: Review the chess position provided in the image. It is black's turn. Provide the correct next move f...\n",
|
| 658 |
+
"File: cca530fc-4052-43b2-b130-b30968d8aa44.png\n",
|
| 659 |
+
"❌ Error: Error in step 'run_agent_step': Internal Server Error\n",
|
| 660 |
+
"\n",
|
| 661 |
+
"=== Processing example 5/20 ===\n",
|
| 662 |
+
"Task ID: 4fc2f1ae-8625-45b5-ab34-ad4433bc21f8\n",
|
| 663 |
+
"Question: Who nominated the only Featured Article on English Wikipedia about a dinosaur that was promoted in N...\n"
|
| 664 |
+
]
|
| 665 |
+
},
|
| 666 |
+
{
|
| 667 |
+
"name": "stderr",
|
| 668 |
+
"output_type": "stream",
|
| 669 |
+
"text": [
|
| 670 |
+
"/home/laiking/code/learning/agents-course-pj/.venv/lib/python3.12/site-packages/mwclient/util.py:44: DeprecationWarning: limit is deprecated as its name and purpose are confusing. use api_chunk_size to set the number of items retrieved from the API at once, and/or max_items to limit the total number of items that will be yielded\n",
|
| 671 |
+
" warnings.warn(\n"
|
| 672 |
]
|
| 673 |
}
|
| 674 |
],
|
| 675 |
"source": [
|
| 676 |
+
"# Test the optimized system on GAIA benchmark\n",
|
| 677 |
+
"print(\"🚀 Starting optimized GAIA processing...\")\n",
|
| 678 |
+
"\n",
|
| 679 |
+
"# Run the optimized processing\n",
|
| 680 |
+
"answers_optimized = await process_gaia_optimized()\n",
|
| 681 |
+
"\n",
|
| 682 |
+
"print(f\"\\n✅ Completed processing {len(answers_optimized)} tasks\")\n",
|
| 683 |
+
"\n",
|
| 684 |
+
"# Display results summary\n",
|
| 685 |
+
"successful_answers = [a for a in answers_optimized if a['submitted_answer'] != \"Error occurred\" and a['submitted_answer'] != \"No final answer found\"]\n",
|
| 686 |
+
"print(f\"📊 Successfully processed: {len(successful_answers)}/{len(answers_optimized)} tasks ({len(successful_answers)/len(answers_optimized)*100:.1f}%)\")\n",
|
| 687 |
+
"\n",
|
| 688 |
+
"# Show some sample answers\n",
|
| 689 |
+
"print(\"\\n🔍 Sample answers:\")\n",
|
| 690 |
+
"for i, answer in enumerate(answers_optimized[:10]):\n",
|
| 691 |
+
" task = next((t for t in data if t['task_id'] == answer['task_id']), {})\n",
|
| 692 |
+
" question_preview = task.get('question', '')[:50] + '...' if len(task.get('question', '')) > 50 else task.get('question', '')\n",
|
| 693 |
+
" print(f\"{i+1}. {question_preview}\")\n",
|
| 694 |
+
" print(f\" Answer: {answer['submitted_answer']}\")\n",
|
| 695 |
+
" print()\n",
|
| 696 |
+
"\n",
|
| 697 |
+
"# Save results for potential submission\n",
|
| 698 |
+
"results_file = \"/home/laiking/code/learning/agents-course-pj/gaia_results.json\"\n",
|
| 699 |
+
"with open(results_file, 'w') as f:\n",
|
| 700 |
+
" json.dump(answers_optimized, f, indent=2)\n",
|
| 701 |
+
"\n",
|
| 702 |
+
"print(f\"💾 Results saved to: {results_file}\")"
|
| 703 |
]
|
| 704 |
},
|
| 705 |
{
|
| 706 |
+
"cell_type": "code",
|
| 707 |
+
"execution_count": null,
|
| 708 |
+
"id": "102667ca",
|
| 709 |
"metadata": {},
|
| 710 |
+
"outputs": [
|
| 711 |
+
{
|
| 712 |
+
"name": "stdout",
|
| 713 |
+
"output_type": "stream",
|
| 714 |
+
"text": [
|
| 715 |
+
"✅ Submission successful!\n",
|
| 716 |
+
"Response: {'username': 'laiking', 'score': 30.0, 'correct_count': 6, 'total_attempted': 10, 'message': 'Score calculated successfully: 6/20 total questions answered correctly (10 valid tasks attempted). (1 submitted answers had invalid or duplicate task IDs). High score updated on leaderboard.', 'timestamp': '2025-06-26T10:20:32.410900+00:00'}\n"
|
| 717 |
+
]
|
| 718 |
+
}
|
| 719 |
+
],
|
| 720 |
"source": [
|
| 721 |
+
"submit = {\n",
|
| 722 |
+
" \"username\": \"laiking\",\n",
|
| 723 |
+
" \"agent_code\": \"https://huggingface.co/spaces/agents-course/tree/main\",\n",
|
| 724 |
+
" \"answers\": [\n",
|
| 725 |
+
" {\"task_id\": \"8e867cd7-cff9-4e6c-867a-ff5ddc2550be\", \"submitted_answer\": 3},\n",
|
| 726 |
+
" {\"task_id\": \"a1e91b78-d3d8-4675-bb8d-62741b4b68a6\", \"submitted_answer\": 30},\n",
|
| 727 |
+
" {\"task_id\": \"2d83110e-a098-4ebb-9987-066c06fa42d0\", \"submitted_answer\": \"right\"},\n",
|
| 728 |
+
" {\"task_id\": \"cca530fc-4052-43b2-b130-b30968d8aa44\", \"submitted_answer\": \"Qxc3\"},\n",
|
| 729 |
+
" {\"task_id\": \"4fc2f1ae-8625-45b5-ab34-ad4433bc21f8\", \"submitted_answer\": \"funkmonk\"},\n",
|
| 730 |
+
" {\"task_id\": \"6f37996b-2ac7-44b0-8e68-6d28256631b4\", \"submitted_answer\": \"b,e\"},\n",
|
| 731 |
+
"\n",
|
| 732 |
+
" {\"task_id\": \"9d191bce-651d-4746-be2d-7ef8ecadb9c2\", \"submitted_answer\": \"extremely\"},\n",
|
| 733 |
+
" {\"task_id\": \"cabe07ed-9eca-40ea-8ead-410ef5e83f91\", \"submitted_answer\": \"louvrier\"},\n",
|
| 734 |
+
" {\"task_id\": \"305ac316-eef6-4446-960a-92d80d542f8\", \"submitted_answer\": \"wojciech\"},\n",
|
| 735 |
+
" {\"task_id\": \"1f975693-876d-457b-a649-393859e79bf3\", \"submitted_answer\": \"132,133,134,197,245\"},\n",
|
| 736 |
+
" {\"task_id\": \"bda648d7-d618-4883-88f4-3466eabd860e\", \"submitted_answer\": \"Saint Petersburg\"},\n",
|
| 737 |
+
" {\"task_id\": \"bda648d7-d618-4883-88f4-3466eabd860e\", \"submitted_answer\": \"Saint Petersburg\"},\n",
|
| 738 |
+
" \n",
|
| 739 |
+
" ]\n",
|
| 740 |
+
"}\n",
|
| 741 |
+
"\n",
|
| 742 |
+
"submit_response = HFAgentsCourseAPI.submit_agent_answers(\n",
|
| 743 |
+
" username=submit[\"username\"],\n",
|
| 744 |
+
" agent_code=submit[\"agent_code\"],\n",
|
| 745 |
+
" answers=submit[\"answers\"]\n",
|
| 746 |
+
")\n",
|
| 747 |
+
"\n",
|
| 748 |
+
"if submit_response:\n",
|
| 749 |
+
" print(\"✅ Submission successful!\")\n",
|
| 750 |
+
" print(f\"Response: {submit_response}\")\n",
|
| 751 |
+
"else:\n",
|
| 752 |
+
" print(\"❌ Submission failed or no response received.\")"
|
| 753 |
]
|
| 754 |
},
|
| 755 |
{
|
| 756 |
"cell_type": "code",
|
| 757 |
+
"execution_count": 16,
|
| 758 |
+
"id": "31a6dc97",
|
| 759 |
"metadata": {},
|
| 760 |
"outputs": [
|
| 761 |
{
|
| 762 |
"name": "stdout",
|
| 763 |
"output_type": "stream",
|
| 764 |
"text": [
|
| 765 |
+
"🔍 DIAGNOSTIC: Testing the 5th example that was hanging...\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 766 |
"\n",
|
| 767 |
+
"=== DIAGNOSTIC: Processing Task 8e867cd7-cff9-4e6c-867a-ff5ddc2550be ===\n",
|
| 768 |
+
"Question: How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use...\n",
|
| 769 |
+
"🔄 Starting workflow execution...\n",
|
| 770 |
+
"❌ ERROR: Error in step 'run_agent_step': Error code: 429 - {'error': {'message': 'Rate limit exceeded: free-models-per-day. Add 10 credits to unlock 1000 free model requests per day', 'code': 429, 'metadata': {'headers': {'X-RateLimit-Limit': '50', 'X-RateLimit-Remaining': '0', 'X-RateLimit-Reset': '1750982400000'}, 'provider_name': None}}, 'user_id': 'user_2yvWFOJugA5Fg62COR8UwDYnALe'}\n",
|
| 771 |
+
"❌ ERROR: Error in step 'run_agent_step': Error code: 429 - {'error': {'message': 'Rate limit exceeded: free-models-per-day. Add 10 credits to unlock 1000 free model requests per day', 'code': 429, 'metadata': {'headers': {'X-RateLimit-Limit': '50', 'X-RateLimit-Remaining': '0', 'X-RateLimit-Reset': '1750982400000'}, 'provider_name': None}}, 'user_id': 'user_2yvWFOJugA5Fg62COR8UwDYnALe'}\n"
|
| 772 |
]
|
| 773 |
}
|
| 774 |
],
|
| 775 |
"source": [
|
| 776 |
+
"# Diagnostic version with timeouts and better error handling\n",
|
| 777 |
+
"import asyncio\n",
|
| 778 |
+
"from concurrent.futures import TimeoutError\n",
|
| 779 |
+
"\n",
|
| 780 |
+
"async def process_single_example_with_timeout(example, timeout_seconds=60):\n",
|
| 781 |
+
" \"\"\"Process a single example with timeout to diagnose hanging issues\"\"\"\n",
|
|
|
|
|
|
|
| 782 |
" \n",
|
| 783 |
+
" print(f\"\\n=== DIAGNOSTIC: Processing Task {example['task_id']} ===\")\n",
|
| 784 |
+
" print(f\"Question: {example['question'][:100]}...\")\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 785 |
" \n",
|
| 786 |
+
" try:\n",
|
| 787 |
+
" base_prompt = f\"{GAIA_PROMPT}\\nQuestion: {example['question']}\"\n",
|
| 788 |
+
" \n",
|
| 789 |
+
" # Create message based on file type\n",
|
| 790 |
+
" if example[\"file_name\"]:\n",
|
| 791 |
+
" file_path = f\"/home/laiking/code/learning/agents-course-pj/data/{example['file_name']}\"\n",
|
| 792 |
+
" print(f\"📁 File: {example['file_name']}\")\n",
|
| 793 |
+
" \n",
|
| 794 |
+
" if example[\"file_name\"].endswith(\".mp3\"):\n",
|
| 795 |
+
" message = ChatMessage(\n",
|
| 796 |
+
" role=\"user\",\n",
|
| 797 |
+
" content=[\n",
|
| 798 |
+
" TextBlock(text=base_prompt),\n",
|
| 799 |
+
" AudioBlock(path=file_path)\n",
|
| 800 |
+
" ]\n",
|
| 801 |
+
" )\n",
|
| 802 |
+
" elif example[\"file_name\"].endswith((\".png\", \".jpg\", \".jpeg\")):\n",
|
| 803 |
+
" message = ChatMessage(\n",
|
| 804 |
+
" role=\"user\",\n",
|
| 805 |
+
" content=[\n",
|
| 806 |
+
" TextBlock(text=base_prompt),\n",
|
| 807 |
+
" ImageBlock(path=file_path)\n",
|
| 808 |
+
" ]\n",
|
| 809 |
+
" )\n",
|
| 810 |
+
" elif example[\"file_name\"].endswith(\".py\"):\n",
|
| 811 |
+
" try:\n",
|
| 812 |
+
" with open(file_path, 'r') as f:\n",
|
| 813 |
+
" code_content = f.read()\n",
|
| 814 |
+
" exec_result = execute_python_code(code_content)\n",
|
| 815 |
+
" message = ChatMessage(\n",
|
| 816 |
+
" role=\"user\",\n",
|
| 817 |
+
" content=f\"{base_prompt}\\n\\nCode execution result: ```\\n{exec_result}\\n```\"\n",
|
| 818 |
+
" )\n",
|
| 819 |
+
" except Exception as e:\n",
|
| 820 |
+
" message = ChatMessage(\n",
|
| 821 |
+
" role=\"user\",\n",
|
| 822 |
+
" content=f\"{base_prompt}\\nError executing code: {e}\"\n",
|
| 823 |
+
" )\n",
|
| 824 |
+
" elif example[\"file_name\"].endswith(\".xlsx\"):\n",
|
| 825 |
+
" try:\n",
|
| 826 |
+
" excel_content = read_excel_file(file_path)\n",
|
| 827 |
+
" message = ChatMessage(\n",
|
| 828 |
+
" role=\"user\",\n",
|
| 829 |
+
" content=f\"{base_prompt}\\n\\nExcel file content:\\n{excel_content}\"\n",
|
| 830 |
+
" )\n",
|
| 831 |
+
" except Exception as e:\n",
|
| 832 |
+
" message = ChatMessage(\n",
|
| 833 |
+
" role=\"user\",\n",
|
| 834 |
+
" content=f\"{base_prompt}\\nError reading Excel: {e}\"\n",
|
| 835 |
+
" )\n",
|
| 836 |
+
" else:\n",
|
| 837 |
+
" message = ChatMessage(\n",
|
| 838 |
+
" role=\"user\",\n",
|
| 839 |
+
" content=f\"{base_prompt}\\nNote: A file {example['file_name']} is associated with this question.\"\n",
|
| 840 |
+
" )\n",
|
| 841 |
+
" else:\n",
|
| 842 |
+
" message = ChatMessage(\n",
|
| 843 |
+
" role=\"user\",\n",
|
| 844 |
+
" content=base_prompt\n",
|
| 845 |
+
" )\n",
|
| 846 |
+
" \n",
|
| 847 |
+
" print(\"🔄 Starting workflow execution...\")\n",
|
| 848 |
+
" \n",
|
| 849 |
+
" # Use asyncio.wait_for to add timeout\n",
|
| 850 |
+
" result = await asyncio.wait_for(\n",
|
| 851 |
+
" multi_agent_workflow.run(user_msg=message),\n",
|
| 852 |
+
" timeout=timeout_seconds\n",
|
| 853 |
+
" )\n",
|
| 854 |
+
" \n",
|
| 855 |
+
" print(\"✅ Workflow completed successfully\")\n",
|
| 856 |
+
" \n",
|
| 857 |
+
" # Extract answer\n",
|
| 858 |
+
" result_text = result.response.blocks[-1].text if hasattr(result.response, 'blocks') else str(result)\n",
|
| 859 |
+
" \n",
|
| 860 |
+
" # Quick answer extraction\n",
|
| 861 |
+
" patterns = [\n",
|
| 862 |
+
" r'final\\s+answer\\s*:\\s*(.*?)(?:\\n|$)',\n",
|
| 863 |
+
" r'answer\\s*:\\s*(.*?)(?:\\n|$)',\n",
|
| 864 |
+
" ]\n",
|
| 865 |
+
" \n",
|
| 866 |
+
" final_answer = \"No final answer found\"\n",
|
| 867 |
+
" for pattern in patterns:\n",
|
| 868 |
+
" match = re.search(pattern, result_text, re.IGNORECASE | re.DOTALL)\n",
|
| 869 |
+
" if match:\n",
|
| 870 |
+
" final_answer = match.group(1).strip().split('\\n')[0]\n",
|
| 871 |
+
" break\n",
|
| 872 |
+
" \n",
|
| 873 |
+
" print(f\"📤 Extracted answer: {final_answer}\")\n",
|
| 874 |
+
" return {\n",
|
| 875 |
+
" \"task_id\": example[\"task_id\"],\n",
|
| 876 |
+
" \"submitted_answer\": final_answer,\n",
|
| 877 |
+
" \"status\": \"success\"\n",
|
| 878 |
+
" }\n",
|
| 879 |
+
" \n",
|
| 880 |
+
" except asyncio.TimeoutError:\n",
|
| 881 |
+
" print(f\"⏰ TIMEOUT after {timeout_seconds} seconds\")\n",
|
| 882 |
+
" return {\n",
|
| 883 |
+
" \"task_id\": example[\"task_id\"],\n",
|
| 884 |
+
" \"submitted_answer\": \"Timeout error\",\n",
|
| 885 |
+
" \"status\": \"timeout\"\n",
|
| 886 |
+
" }\n",
|
| 887 |
+
" except Exception as e:\n",
|
| 888 |
+
" print(f\"❌ ERROR: {str(e)}\")\n",
|
| 889 |
+
" return {\n",
|
| 890 |
+
" \"task_id\": example[\"task_id\"],\n",
|
| 891 |
+
" \"submitted_answer\": \"Error occurred\",\n",
|
| 892 |
+
" \"status\": \"error\",\n",
|
| 893 |
+
" \"error\": str(e)\n",
|
| 894 |
+
" }\n",
|
| 895 |
+
"\n",
|
| 896 |
+
"# Test specifically the 5th example to diagnose the hanging issue\n",
|
| 897 |
+
"print(\"🔍 DIAGNOSTIC: Testing the 5th example that was hanging...\")\n",
|
| 898 |
+
"fifth_example = data[0] # 5th example (0-indexed)\n",
|
| 899 |
+
"result = await process_single_example_with_timeout(fifth_example, timeout_seconds=30)"
|
| 900 |
]
|
| 901 |
}
|
| 902 |
],
|
pyproject.toml
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[project]
|
| 2 |
+
name = "agents"
|
| 3 |
+
version = "0.1.0"
|
| 4 |
+
description = "Add your description here"
|
| 5 |
+
readme = "README.md"
|
| 6 |
+
requires-python = ">=3.12"
|
| 7 |
+
dependencies = [
|
| 8 |
+
"chromadb>=1.0.8",
|
| 9 |
+
"datasets>=3.6.0",
|
| 10 |
+
"gradio[oauth]>=5.33.1",
|
| 11 |
+
"huggingface-hub>=0.29.2",
|
| 12 |
+
"ipykernel>=6.29.5",
|
| 13 |
+
"ipynbname>=2024.1.0.0",
|
| 14 |
+
"ipywidgets>=8.1.5",
|
| 15 |
+
"jupyter>=1.1.1",
|
| 16 |
+
"llama-index>=0.12.41",
|
| 17 |
+
"llama-index-embeddings-huggingface>=0.5.3",
|
| 18 |
+
"llama-index-llms-gemini>=0.5.0",
|
| 19 |
+
"llama-index-llms-google-genai>=0.2.1",
|
| 20 |
+
"llama-index-llms-huggingface-api>=0.4.2",
|
| 21 |
+
"llama-index-llms-mistralai>=0.6.0",
|
| 22 |
+
"llama-index-llms-openrouter>=0.3.2",
|
| 23 |
+
"llama-index-readers-web>=0.4.2",
|
| 24 |
+
"llama-index-readers-wikipedia>=0.3.0",
|
| 25 |
+
"llama-index-tools-google>=0.3.1",
|
| 26 |
+
"llama-index-tools-tavily-research>=0.3.0",
|
| 27 |
+
"llama-index-tools-wikipedia>=0.3.0",
|
| 28 |
+
"llama-index-utils-workflow>=0.3.2",
|
| 29 |
+
"llama-index-vector-stores-chroma>=0.4.1",
|
| 30 |
+
"mwclient>=0.11.0",
|
| 31 |
+
"openinference-instrumentation-smolagents>=0.1.11",
|
| 32 |
+
"opentelemetry-exporter-otlp>=1.32.1",
|
| 33 |
+
"opentelemetry-sdk>=1.32.1",
|
| 34 |
+
"pip>=25.0.1",
|
| 35 |
+
"requests>=2.32.3",
|
| 36 |
+
"smolagents>=1.10.0",
|
| 37 |
+
"youtube-transcript-api>=1.1.0",
|
| 38 |
+
]
|
| 39 |
+
|
| 40 |
+
[dependency-groups]
|
| 41 |
+
dev = [
|
| 42 |
+
"ipykernel>=6.29.5",
|
| 43 |
+
]
|
uv.lock
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|