Final_Assignment_Template

Running

App Files Files Community

Paperbag commited on Mar 23

Commit

6000e5d

1 Parent(s): 40e8192

feat: introduce tools for local Python script execution and document reading, and refine GAIA output formatting.

Browse files

Files changed (2) hide show

agent.py +73 -9
requirements.txt +1 -0

agent.py CHANGED Viewed

@@ -1,5 +1,7 @@
 import os
 import datetime
 from typing import TypedDict, List, Dict, Any, Optional, Union
 from langchain_core import tools
 from langgraph.graph import StateGraph, START, END
@@ -190,6 +192,53 @@ def read_url(url: str) -> str:
     except Exception as e:
         return f"Error reading URL: {e}"
 system_prompt = """
 You are a helpful assistant tasked with answering questions using a set of tools.
@@ -232,7 +281,7 @@ def restart_required(state: AgentState) -> AgentState:
 #     return {"messages": messages + [response]}
 # Augment the LLM with tools
-tools = [web_search, wiki_search, analyze_image, analyze_video, read_url]
 tools_by_name = {tool.name: tool for tool in tools}
 model_with_tools = model.bind_tools(tools)
@@ -247,14 +296,15 @@ def answer_message(state: AgentState) -> AgentState:
     TODAY'S EXACT DATE is {current_date}. Keep this in mind for all time-sensitive queries.
-    CRITICAL RULES FOR SEARCH:
-    1. When using tools like web_search or wiki_search, do not blindly search the entire question. Extract the core entities.
-    2. If the first search result doesn't contain the answer, THINK step-by-step, refine your search query (e.g., use synonyms, or search for broader concepts), and search again.
-    3. Cross-reference facts if they seem ambiguous.
     Do not include any thought process before answering the question, and only response exactly what was being asked of you.
     If you are not able to provide an answer, use tools or state the limitation that you're facing instead.
-    If a file is attached, use the appropriate tool (analyze_image or analyze_video) to answer the question based on the file content.
     YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
     If you are asked for a number, don't use comma to write your number, and don't use units such as $ or percent sign unless specified otherwise.
@@ -288,15 +338,29 @@ def answer_message(state: AgentState) -> AgentState:
     final_instruction = HumanMessage(
         content=(
             "Using the tool results above, provide the FINAL numeric/text answer now. "
-            "Do not call any tools. Respond with only the answer."
         )
     )
     messages.append(final_instruction)
-    final_response = model.invoke(messages)
-    print(f"Final response: {final_response}")
     # Return messages including the final AIMessage so BasicAgent reads .content
     messages.append(final_response)
     return {"messages": messages}

 import os
 import datetime
+import subprocess
+import tempfile
 from typing import TypedDict, List, Dict, Any, Optional, Union
 from langchain_core import tools
 from langgraph.graph import StateGraph, START, END
     except Exception as e:
         return f"Error reading URL: {e}"
+@tool
+def run_python_script(code: str) -> str:
+    """
+    Executes a Python script locally and returns the stdout and stderr.
+    Use this to perform complex math, data analysis (e.g. pandas), or file processing.
+    When given a file path, you can write python code to read and analyze it.
+    """
+    with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f:
+        f.write(code)
+        temp_file_name = f.name
+    try:
+        result = subprocess.run(
+            ["python", temp_file_name],
+            capture_output=True,
+            text=True,
+            timeout=30
+        )
+        os.remove(temp_file_name)
+        output = result.stdout
+        if result.stderr:
+            output += f"\nErrors:\n{result.stderr}"
+        return (output or "Script executed successfully with no output.")[:15000]
+    except subprocess.TimeoutExpired:
+        os.remove(temp_file_name)
+        return "Script execution timed out after 30 seconds."
+    except Exception as e:
+        if os.path.exists(temp_file_name):
+            os.remove(temp_file_name)
+        return f"Failed to execute script: {str(e)}"
+@tool
+def read_document(file_path: str) -> str:
+    """
+    Reads the text contents of a local document (.txt, .csv, .json, .md).
+    For binary files like .xlsx or .pdf, use run_python_script to process them instead.
+    """
+    try:
+        with open(file_path, 'r', encoding='utf-8') as f:
+            content = f.read()
+            if len(content) > 15000:
+                return content[:15000] + "... (truncated)"
+            return content
+    except Exception as e:
+        return f"Error reading document: {str(e)}. Tip: You can try running a python script to read it!"
 system_prompt = """
 You are a helpful assistant tasked with answering questions using a set of tools.
 #     return {"messages": messages + [response]}
 # Augment the LLM with tools
+tools = [web_search, wiki_search, analyze_image, analyze_video, read_url, run_python_script, read_document]
 tools_by_name = {tool.name: tool for tool in tools}
 model_with_tools = model.bind_tools(tools)
     TODAY'S EXACT DATE is {current_date}. Keep this in mind for all time-sensitive queries.
+    CRITICAL RULES FOR SEARCH & TOOLS:
+    1. If a file is attached, use the appropriate tool (run_python_script, read_document, analyze_image, analyze_video) to answer the question based on the file content.
+    2. Use run_python_script freely to process data (pandas), read complex documents (.xlsx, .pdf), or do heavy math calculations.
+    3. When using tools like web_search or wiki_search, do not blindly search the entire question. Extract the core entities.
+    4. If the first search result doesn't contain the answer, THINK step-by-step, refine your search query (e.g., use synonyms, or search for broader concepts), and search again.
+    5. Cross-reference facts if they seem ambiguous.
     Do not include any thought process before answering the question, and only response exactly what was being asked of you.
     If you are not able to provide an answer, use tools or state the limitation that you're facing instead.
     YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
     If you are asked for a number, don't use comma to write your number, and don't use units such as $ or percent sign unless specified otherwise.
     final_instruction = HumanMessage(
         content=(
             "Using the tool results above, provide the FINAL numeric/text answer now. "
+            "Do not call any tools. Provide exactly what was asked."
         )
     )
     messages.append(final_instruction)
+    draft_response = model.invoke(messages)
+    # Third pass: strict GAIA formatting extraction
+    formatting_sys = SystemMessage(
+        content=(
+            "You are a strict output formatter for the GAIA benchmark. "
+            "Given a verbose draft answer, extract ONLY the final exact answer required. "
+            "Return nothing else. DO NOT include prefixes like 'The answer is'. "
+            "Strip all punctuation points at the end and quotes. "
+            "If the answer is a number, just return the number without commas or units unless specified. "
+            "If it is a name or word, just return the exact string. If a list, return only the comma-separated list."
+        )
+    )
+    final_response = model.invoke([formatting_sys, HumanMessage(content=draft_response.content)])
+    print(f"Draft response: {draft_response.content}")
+    print(f"Strict Final response: {final_response.content}")
     # Return messages including the final AIMessage so BasicAgent reads .content
+    messages.append(draft_response)
     messages.append(final_response)
     return {"messages": messages}

requirements.txt CHANGED Viewed

@@ -23,3 +23,4 @@ groq
 unstructured[all-docs]
 opencv-python
 beautifulsoup4

 unstructured[all-docs]
 opencv-python
 beautifulsoup4
+PyPDF2