feat: introduce tools for local Python script execution and document reading, and refine GAIA output formatting.
Browse files- agent.py +73 -9
- requirements.txt +1 -0
agent.py
CHANGED
|
@@ -1,5 +1,7 @@
|
|
| 1 |
import os
|
| 2 |
import datetime
|
|
|
|
|
|
|
| 3 |
from typing import TypedDict, List, Dict, Any, Optional, Union
|
| 4 |
from langchain_core import tools
|
| 5 |
from langgraph.graph import StateGraph, START, END
|
|
@@ -190,6 +192,53 @@ def read_url(url: str) -> str:
|
|
| 190 |
except Exception as e:
|
| 191 |
return f"Error reading URL: {e}"
|
| 192 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
|
| 194 |
system_prompt = """
|
| 195 |
You are a helpful assistant tasked with answering questions using a set of tools.
|
|
@@ -232,7 +281,7 @@ def restart_required(state: AgentState) -> AgentState:
|
|
| 232 |
# return {"messages": messages + [response]}
|
| 233 |
|
| 234 |
# Augment the LLM with tools
|
| 235 |
-
tools = [web_search, wiki_search, analyze_image, analyze_video, read_url]
|
| 236 |
tools_by_name = {tool.name: tool for tool in tools}
|
| 237 |
model_with_tools = model.bind_tools(tools)
|
| 238 |
|
|
@@ -247,14 +296,15 @@ def answer_message(state: AgentState) -> AgentState:
|
|
| 247 |
|
| 248 |
TODAY'S EXACT DATE is {current_date}. Keep this in mind for all time-sensitive queries.
|
| 249 |
|
| 250 |
-
CRITICAL RULES FOR SEARCH:
|
| 251 |
-
1.
|
| 252 |
-
2.
|
| 253 |
-
3.
|
|
|
|
|
|
|
| 254 |
|
| 255 |
Do not include any thought process before answering the question, and only response exactly what was being asked of you.
|
| 256 |
If you are not able to provide an answer, use tools or state the limitation that you're facing instead.
|
| 257 |
-
If a file is attached, use the appropriate tool (analyze_image or analyze_video) to answer the question based on the file content.
|
| 258 |
|
| 259 |
YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
|
| 260 |
If you are asked for a number, don't use comma to write your number, and don't use units such as $ or percent sign unless specified otherwise.
|
|
@@ -288,15 +338,29 @@ def answer_message(state: AgentState) -> AgentState:
|
|
| 288 |
final_instruction = HumanMessage(
|
| 289 |
content=(
|
| 290 |
"Using the tool results above, provide the FINAL numeric/text answer now. "
|
| 291 |
-
"Do not call any tools.
|
| 292 |
)
|
| 293 |
)
|
| 294 |
messages.append(final_instruction)
|
|
|
|
| 295 |
|
| 296 |
-
|
| 297 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 298 |
|
| 299 |
# Return messages including the final AIMessage so BasicAgent reads .content
|
|
|
|
| 300 |
messages.append(final_response)
|
| 301 |
return {"messages": messages}
|
| 302 |
|
|
|
|
| 1 |
import os
|
| 2 |
import datetime
|
| 3 |
+
import subprocess
|
| 4 |
+
import tempfile
|
| 5 |
from typing import TypedDict, List, Dict, Any, Optional, Union
|
| 6 |
from langchain_core import tools
|
| 7 |
from langgraph.graph import StateGraph, START, END
|
|
|
|
| 192 |
except Exception as e:
|
| 193 |
return f"Error reading URL: {e}"
|
| 194 |
|
| 195 |
+
@tool
|
| 196 |
+
def run_python_script(code: str) -> str:
|
| 197 |
+
"""
|
| 198 |
+
Executes a Python script locally and returns the stdout and stderr.
|
| 199 |
+
Use this to perform complex math, data analysis (e.g. pandas), or file processing.
|
| 200 |
+
When given a file path, you can write python code to read and analyze it.
|
| 201 |
+
"""
|
| 202 |
+
with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f:
|
| 203 |
+
f.write(code)
|
| 204 |
+
temp_file_name = f.name
|
| 205 |
+
|
| 206 |
+
try:
|
| 207 |
+
result = subprocess.run(
|
| 208 |
+
["python", temp_file_name],
|
| 209 |
+
capture_output=True,
|
| 210 |
+
text=True,
|
| 211 |
+
timeout=30
|
| 212 |
+
)
|
| 213 |
+
os.remove(temp_file_name)
|
| 214 |
+
|
| 215 |
+
output = result.stdout
|
| 216 |
+
if result.stderr:
|
| 217 |
+
output += f"\nErrors:\n{result.stderr}"
|
| 218 |
+
|
| 219 |
+
return (output or "Script executed successfully with no output.")[:15000]
|
| 220 |
+
except subprocess.TimeoutExpired:
|
| 221 |
+
os.remove(temp_file_name)
|
| 222 |
+
return "Script execution timed out after 30 seconds."
|
| 223 |
+
except Exception as e:
|
| 224 |
+
if os.path.exists(temp_file_name):
|
| 225 |
+
os.remove(temp_file_name)
|
| 226 |
+
return f"Failed to execute script: {str(e)}"
|
| 227 |
+
|
| 228 |
+
@tool
|
| 229 |
+
def read_document(file_path: str) -> str:
|
| 230 |
+
"""
|
| 231 |
+
Reads the text contents of a local document (.txt, .csv, .json, .md).
|
| 232 |
+
For binary files like .xlsx or .pdf, use run_python_script to process them instead.
|
| 233 |
+
"""
|
| 234 |
+
try:
|
| 235 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
| 236 |
+
content = f.read()
|
| 237 |
+
if len(content) > 15000:
|
| 238 |
+
return content[:15000] + "... (truncated)"
|
| 239 |
+
return content
|
| 240 |
+
except Exception as e:
|
| 241 |
+
return f"Error reading document: {str(e)}. Tip: You can try running a python script to read it!"
|
| 242 |
|
| 243 |
system_prompt = """
|
| 244 |
You are a helpful assistant tasked with answering questions using a set of tools.
|
|
|
|
| 281 |
# return {"messages": messages + [response]}
|
| 282 |
|
| 283 |
# Augment the LLM with tools
|
| 284 |
+
tools = [web_search, wiki_search, analyze_image, analyze_video, read_url, run_python_script, read_document]
|
| 285 |
tools_by_name = {tool.name: tool for tool in tools}
|
| 286 |
model_with_tools = model.bind_tools(tools)
|
| 287 |
|
|
|
|
| 296 |
|
| 297 |
TODAY'S EXACT DATE is {current_date}. Keep this in mind for all time-sensitive queries.
|
| 298 |
|
| 299 |
+
CRITICAL RULES FOR SEARCH & TOOLS:
|
| 300 |
+
1. If a file is attached, use the appropriate tool (run_python_script, read_document, analyze_image, analyze_video) to answer the question based on the file content.
|
| 301 |
+
2. Use run_python_script freely to process data (pandas), read complex documents (.xlsx, .pdf), or do heavy math calculations.
|
| 302 |
+
3. When using tools like web_search or wiki_search, do not blindly search the entire question. Extract the core entities.
|
| 303 |
+
4. If the first search result doesn't contain the answer, THINK step-by-step, refine your search query (e.g., use synonyms, or search for broader concepts), and search again.
|
| 304 |
+
5. Cross-reference facts if they seem ambiguous.
|
| 305 |
|
| 306 |
Do not include any thought process before answering the question, and only response exactly what was being asked of you.
|
| 307 |
If you are not able to provide an answer, use tools or state the limitation that you're facing instead.
|
|
|
|
| 308 |
|
| 309 |
YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
|
| 310 |
If you are asked for a number, don't use comma to write your number, and don't use units such as $ or percent sign unless specified otherwise.
|
|
|
|
| 338 |
final_instruction = HumanMessage(
|
| 339 |
content=(
|
| 340 |
"Using the tool results above, provide the FINAL numeric/text answer now. "
|
| 341 |
+
"Do not call any tools. Provide exactly what was asked."
|
| 342 |
)
|
| 343 |
)
|
| 344 |
messages.append(final_instruction)
|
| 345 |
+
draft_response = model.invoke(messages)
|
| 346 |
|
| 347 |
+
# Third pass: strict GAIA formatting extraction
|
| 348 |
+
formatting_sys = SystemMessage(
|
| 349 |
+
content=(
|
| 350 |
+
"You are a strict output formatter for the GAIA benchmark. "
|
| 351 |
+
"Given a verbose draft answer, extract ONLY the final exact answer required. "
|
| 352 |
+
"Return nothing else. DO NOT include prefixes like 'The answer is'. "
|
| 353 |
+
"Strip all punctuation points at the end and quotes. "
|
| 354 |
+
"If the answer is a number, just return the number without commas or units unless specified. "
|
| 355 |
+
"If it is a name or word, just return the exact string. If a list, return only the comma-separated list."
|
| 356 |
+
)
|
| 357 |
+
)
|
| 358 |
+
final_response = model.invoke([formatting_sys, HumanMessage(content=draft_response.content)])
|
| 359 |
+
print(f"Draft response: {draft_response.content}")
|
| 360 |
+
print(f"Strict Final response: {final_response.content}")
|
| 361 |
|
| 362 |
# Return messages including the final AIMessage so BasicAgent reads .content
|
| 363 |
+
messages.append(draft_response)
|
| 364 |
messages.append(final_response)
|
| 365 |
return {"messages": messages}
|
| 366 |
|
requirements.txt
CHANGED
|
@@ -23,3 +23,4 @@ groq
|
|
| 23 |
unstructured[all-docs]
|
| 24 |
opencv-python
|
| 25 |
beautifulsoup4
|
|
|
|
|
|
| 23 |
unstructured[all-docs]
|
| 24 |
opencv-python
|
| 25 |
beautifulsoup4
|
| 26 |
+
PyPDF2
|