Paperbag commited on
Commit
6000e5d
·
1 Parent(s): 40e8192

feat: introduce tools for local Python script execution and document reading, and refine GAIA output formatting.

Browse files
Files changed (2) hide show
  1. agent.py +73 -9
  2. requirements.txt +1 -0
agent.py CHANGED
@@ -1,5 +1,7 @@
1
  import os
2
  import datetime
 
 
3
  from typing import TypedDict, List, Dict, Any, Optional, Union
4
  from langchain_core import tools
5
  from langgraph.graph import StateGraph, START, END
@@ -190,6 +192,53 @@ def read_url(url: str) -> str:
190
  except Exception as e:
191
  return f"Error reading URL: {e}"
192
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
 
194
  system_prompt = """
195
  You are a helpful assistant tasked with answering questions using a set of tools.
@@ -232,7 +281,7 @@ def restart_required(state: AgentState) -> AgentState:
232
  # return {"messages": messages + [response]}
233
 
234
  # Augment the LLM with tools
235
- tools = [web_search, wiki_search, analyze_image, analyze_video, read_url]
236
  tools_by_name = {tool.name: tool for tool in tools}
237
  model_with_tools = model.bind_tools(tools)
238
 
@@ -247,14 +296,15 @@ def answer_message(state: AgentState) -> AgentState:
247
 
248
  TODAY'S EXACT DATE is {current_date}. Keep this in mind for all time-sensitive queries.
249
 
250
- CRITICAL RULES FOR SEARCH:
251
- 1. When using tools like web_search or wiki_search, do not blindly search the entire question. Extract the core entities.
252
- 2. If the first search result doesn't contain the answer, THINK step-by-step, refine your search query (e.g., use synonyms, or search for broader concepts), and search again.
253
- 3. Cross-reference facts if they seem ambiguous.
 
 
254
 
255
  Do not include any thought process before answering the question, and only response exactly what was being asked of you.
256
  If you are not able to provide an answer, use tools or state the limitation that you're facing instead.
257
- If a file is attached, use the appropriate tool (analyze_image or analyze_video) to answer the question based on the file content.
258
 
259
  YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
260
  If you are asked for a number, don't use comma to write your number, and don't use units such as $ or percent sign unless specified otherwise.
@@ -288,15 +338,29 @@ def answer_message(state: AgentState) -> AgentState:
288
  final_instruction = HumanMessage(
289
  content=(
290
  "Using the tool results above, provide the FINAL numeric/text answer now. "
291
- "Do not call any tools. Respond with only the answer."
292
  )
293
  )
294
  messages.append(final_instruction)
 
295
 
296
- final_response = model.invoke(messages)
297
- print(f"Final response: {final_response}")
 
 
 
 
 
 
 
 
 
 
 
 
298
 
299
  # Return messages including the final AIMessage so BasicAgent reads .content
 
300
  messages.append(final_response)
301
  return {"messages": messages}
302
 
 
1
  import os
2
  import datetime
3
+ import subprocess
4
+ import tempfile
5
  from typing import TypedDict, List, Dict, Any, Optional, Union
6
  from langchain_core import tools
7
  from langgraph.graph import StateGraph, START, END
 
192
  except Exception as e:
193
  return f"Error reading URL: {e}"
194
 
195
+ @tool
196
+ def run_python_script(code: str) -> str:
197
+ """
198
+ Executes a Python script locally and returns the stdout and stderr.
199
+ Use this to perform complex math, data analysis (e.g. pandas), or file processing.
200
+ When given a file path, you can write python code to read and analyze it.
201
+ """
202
+ with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f:
203
+ f.write(code)
204
+ temp_file_name = f.name
205
+
206
+ try:
207
+ result = subprocess.run(
208
+ ["python", temp_file_name],
209
+ capture_output=True,
210
+ text=True,
211
+ timeout=30
212
+ )
213
+ os.remove(temp_file_name)
214
+
215
+ output = result.stdout
216
+ if result.stderr:
217
+ output += f"\nErrors:\n{result.stderr}"
218
+
219
+ return (output or "Script executed successfully with no output.")[:15000]
220
+ except subprocess.TimeoutExpired:
221
+ os.remove(temp_file_name)
222
+ return "Script execution timed out after 30 seconds."
223
+ except Exception as e:
224
+ if os.path.exists(temp_file_name):
225
+ os.remove(temp_file_name)
226
+ return f"Failed to execute script: {str(e)}"
227
+
228
+ @tool
229
+ def read_document(file_path: str) -> str:
230
+ """
231
+ Reads the text contents of a local document (.txt, .csv, .json, .md).
232
+ For binary files like .xlsx or .pdf, use run_python_script to process them instead.
233
+ """
234
+ try:
235
+ with open(file_path, 'r', encoding='utf-8') as f:
236
+ content = f.read()
237
+ if len(content) > 15000:
238
+ return content[:15000] + "... (truncated)"
239
+ return content
240
+ except Exception as e:
241
+ return f"Error reading document: {str(e)}. Tip: You can try running a python script to read it!"
242
 
243
  system_prompt = """
244
  You are a helpful assistant tasked with answering questions using a set of tools.
 
281
  # return {"messages": messages + [response]}
282
 
283
  # Augment the LLM with tools
284
+ tools = [web_search, wiki_search, analyze_image, analyze_video, read_url, run_python_script, read_document]
285
  tools_by_name = {tool.name: tool for tool in tools}
286
  model_with_tools = model.bind_tools(tools)
287
 
 
296
 
297
  TODAY'S EXACT DATE is {current_date}. Keep this in mind for all time-sensitive queries.
298
 
299
+ CRITICAL RULES FOR SEARCH & TOOLS:
300
+ 1. If a file is attached, use the appropriate tool (run_python_script, read_document, analyze_image, analyze_video) to answer the question based on the file content.
301
+ 2. Use run_python_script freely to process data (pandas), read complex documents (.xlsx, .pdf), or do heavy math calculations.
302
+ 3. When using tools like web_search or wiki_search, do not blindly search the entire question. Extract the core entities.
303
+ 4. If the first search result doesn't contain the answer, THINK step-by-step, refine your search query (e.g., use synonyms, or search for broader concepts), and search again.
304
+ 5. Cross-reference facts if they seem ambiguous.
305
 
306
  Do not include any thought process before answering the question, and only response exactly what was being asked of you.
307
  If you are not able to provide an answer, use tools or state the limitation that you're facing instead.
 
308
 
309
  YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
310
  If you are asked for a number, don't use comma to write your number, and don't use units such as $ or percent sign unless specified otherwise.
 
338
  final_instruction = HumanMessage(
339
  content=(
340
  "Using the tool results above, provide the FINAL numeric/text answer now. "
341
+ "Do not call any tools. Provide exactly what was asked."
342
  )
343
  )
344
  messages.append(final_instruction)
345
+ draft_response = model.invoke(messages)
346
 
347
+ # Third pass: strict GAIA formatting extraction
348
+ formatting_sys = SystemMessage(
349
+ content=(
350
+ "You are a strict output formatter for the GAIA benchmark. "
351
+ "Given a verbose draft answer, extract ONLY the final exact answer required. "
352
+ "Return nothing else. DO NOT include prefixes like 'The answer is'. "
353
+ "Strip all punctuation points at the end and quotes. "
354
+ "If the answer is a number, just return the number without commas or units unless specified. "
355
+ "If it is a name or word, just return the exact string. If a list, return only the comma-separated list."
356
+ )
357
+ )
358
+ final_response = model.invoke([formatting_sys, HumanMessage(content=draft_response.content)])
359
+ print(f"Draft response: {draft_response.content}")
360
+ print(f"Strict Final response: {final_response.content}")
361
 
362
  # Return messages including the final AIMessage so BasicAgent reads .content
363
+ messages.append(draft_response)
364
  messages.append(final_response)
365
  return {"messages": messages}
366
 
requirements.txt CHANGED
@@ -23,3 +23,4 @@ groq
23
  unstructured[all-docs]
24
  opencv-python
25
  beautifulsoup4
 
 
23
  unstructured[all-docs]
24
  opencv-python
25
  beautifulsoup4
26
+ PyPDF2