final_assignment_hfAgentsCourse

Sleeping

App Files Files Community

datdevsteve commited on Dec 14, 2025

Commit

8eb1cc8

verified ·

1 Parent(s): 4d5faa9

fixes for gaia submission

Browse files

Files changed (1) hide show

gaia_agent.py +119 -98

gaia_agent.py CHANGED Viewed

@@ -1,133 +1,123 @@
 import os
 import requests
-from langchain.agents import create_agent
 from langchain.tools import tool
 from dotenv import load_dotenv
 from langchain_community.document_loaders import ArxivLoader, WikipediaLoader
-from ddgs import DDGS
 from bs4 import BeautifulSoup
 # Load environment variables
-#load_dotenv()
 # --- Agent Setup ---
-openai_key = os.getenv("OPENAI_API_KEY")
-googleai_key = os.getenv("GOOGLE_API_KEY")
-# Use OpenRouter via LangChain's ChatOpenAI
 openrouter_key = os.getenv("OPENROUTER_API_KEY")
 if not openrouter_key:
     raise RuntimeError("Set OPENROUTER_API_KEY in your .env (OpenRouter API key)")
-# Defer ChatOpenAI import until runtime to avoid import-time errors in environments without the package
 from langchain_openai import ChatOpenAI
 model = ChatOpenAI(
-  api_key=openrouter_key,
-  base_url="https://openrouter.ai/api/v1",
-  model="gpt-4o-mini",
-  max_completion_tokens=10000,
 )
 # --- Tools Definition ---
 @tool
-def multiply(a: int, b: int) -> int:
     """Multiply two numbers.
     Args:
-        a: first int
-        b: second int
     """
     return a * b
 @tool
-def add(a: int, b: int) -> int:
     """Add two numbers.
     Args:
-        a: first int
-        b: second int
     """
     return a + b
 @tool
-def subtract(a: int, b: int) -> int:
     """Subtract two numbers.
     Args:
-        a: first int
-        b: second int
     """
     return a - b
 @tool
-def divide(a: int, b: int) -> int:
     """Divide two numbers.
     Args:
-        a: first int
-        b: second int
     """
     if b == 0:
         raise ValueError("Cannot divide by zero.")
     return a / b
 @tool
-def modulus(a: int, b: int) -> int:
     """Get the modulus of two numbers.
     Args:
-        a: first int
-        b: second int
     """
     return a % b
 @tool
 def wiki_search(query: str) -> str:
     """Search Wikipedia for a query and return maximum 2 results."""
-    search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
-    formatted_search_docs = "\n\n---\n\n".join(
-        [
-            f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
-            for doc in search_docs
-        ]
-    )
-    return formatted_search_docs
 @tool
 def web_search(query: str) -> str:
-    """Search DDGS for a query and return maximum 3 results."""
-    search_docs = DDGS().text(query, max_results=3)
-    formatted_search_docs = "\n\n---\n\n".join(
-        [
-            f'Title:{doc["title"]}\nContent:{doc["body"]}\n--\n'
-            for doc in search_docs
-        ]
-    )
-    return formatted_search_docs
 @tool
 def arxiv_search(query: str) -> str:
     """Search arXiv for a query and return maximum 3 results."""
-    search_docs = ArxivLoader(query=query, load_max_docs=3).load()
-    formatted_search_docs = "\n\n---\n\n".join(
-        [
-            f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>'
-            for doc in search_docs
-        ]
-    )
-    return formatted_search_docs
-@tool
-def image_search(query: str) -> str:
-    """Searches DDGS for an image query and returns maximum 10 image results"""
-    search_images = DDGS().images(query=query)
-    formatted_result = "\n\n---\n\n".join(
-        [
-            f'Image Title:{image["title"]}\nImage URL: {image["url"]}'
-            for image in search_images
-        ]
-    )
-    return formatted_result
 @tool
 def fetch_url_content(url: str) -> str:
@@ -139,52 +129,83 @@ def fetch_url_content(url: str) -> str:
         for script in soup(["script", "style"]):
             script.decompose()
         text = soup.get_text(separator='\n', strip=True)
-        return text[:2000] + ("..." if len(text) > 2000 else "")
     except Exception as e:
         return f"Error fetching URL: {str(e)}"
 # Tools list
 tools = [
     multiply, add, subtract, divide, modulus,
-    wiki_search, web_search, arxiv_search, image_search,
     fetch_url_content,
 ]
-# System prompt
-sys_prompt = """You are a helpful agent, please provide clear and concise answers to asked questions.
-Keep your word limit for answers as minimum as you can. You are equipped with the following tools:
-1. [multiply], [add], [subtract], [divide], [modulus] - basic calculator operations.
-2. [wiki_search] - search Wikipedia and return up to 2 documents as text.
-3. [web_search] - perform a web search and return up to 3 documents as text.
-4. [arxiv_search] - search arXiv and return up to 3 documents as text.
-5. [image_search] - Searches the internet for an image query and returns maximum 10 image results
-Under any circumstances, if you fail to provide the accurate answer expected by the user, you may say the same to the user and provide a similar answer which is approximately the closest. Disregard spelling mistakes and provide answer with results retreived from the correct spelling.
-For every tool you use, append a single line at the end of your response exactly in this format:
-[TOOLS USED: (tool_name)]
-When no tools are used, append:
-[TOOLS USED WERE NONE]
-"""
 class GAIAAgent:
     def __init__(self):
-        # create internal agent
         try:
-            self.agent = create_agent(model, tools=tools, system_prompt=sys_prompt)
         except Exception as e:
             raise
     def __call__(self, question: str) -> str:
-        result = self.agent.invoke({"messages": [{"role": "user", "content": question}]})
-        raw_content = result["messages"][-1].content
-        if isinstance(raw_content, list) and len(raw_content) > 0:
-            if isinstance(raw_content[0], dict) and 'text' in raw_content[0]:
-                answer = raw_content[0]['text']
-            else:
-                answer = str(raw_content)
-        elif isinstance(raw_content, str):
-            answer = raw_content
-        else:
-            answer = str(raw_content)
-        return answer

 import os
 import requests
+from langchain.agents import create_react_agent, AgentExecutor
 from langchain.tools import tool
+from langchain_core.prompts import PromptTemplate
 from dotenv import load_dotenv
 from langchain_community.document_loaders import ArxivLoader, WikipediaLoader
+from duckduckgo_search import DDGS
 from bs4 import BeautifulSoup
 # Load environment variables
+# load_dotenv()
 # --- Agent Setup ---
 openrouter_key = os.getenv("OPENROUTER_API_KEY")
 if not openrouter_key:
     raise RuntimeError("Set OPENROUTER_API_KEY in your .env (OpenRouter API key)")
 from langchain_openai import ChatOpenAI
 model = ChatOpenAI(
+    api_key=openrouter_key,
+    base_url="https://openrouter.ai/api/v1",
+    model="openai/gpt-4o-mini",
+    max_tokens=10000,
+    temperature=0
 )
 # --- Tools Definition ---
 @tool
+def multiply(a: float, b: float) -> float:
     """Multiply two numbers.
     Args:
+        a: first number
+        b: second number
     """
     return a * b
 @tool
+def add(a: float, b: float) -> float:
     """Add two numbers.
     Args:
+        a: first number
+        b: second number
     """
     return a + b
 @tool
+def subtract(a: float, b: float) -> float:
     """Subtract two numbers.
     Args:
+        a: first number
+        b: second number
     """
     return a - b
 @tool
+def divide(a: float, b: float) -> float:
     """Divide two numbers.
     Args:
+        a: first number
+        b: second number
     """
     if b == 0:
         raise ValueError("Cannot divide by zero.")
     return a / b
 @tool
+def modulus(a: float, b: float) -> float:
     """Get the modulus of two numbers.
     Args:
+        a: first number
+        b: second number
     """
     return a % b
 @tool
 def wiki_search(query: str) -> str:
     """Search Wikipedia for a query and return maximum 2 results."""
+    try:
+        search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
+        formatted_search_docs = "\n\n---\n\n".join(
+            [
+                f'\n{doc.page_content}\n'
+                for doc in search_docs
+            ]
+        )
+        return formatted_search_docs
+    except Exception as e:
+        return f"Error searching Wikipedia: {str(e)}"
 @tool
 def web_search(query: str) -> str:
+    """Search the web for a query and return maximum 3 results."""
+    try:
+        search_docs = DDGS().text(query, max_results=3)
+        formatted_search_docs = "\n\n---\n\n".join(
+            [
+                f'Title:{doc["title"]}\nContent:{doc["body"]}\n--\n'
+                for doc in search_docs
+            ]
+        )
+        return formatted_search_docs
+    except Exception as e:
+        return f"Error searching web: {str(e)}"
 @tool
 def arxiv_search(query: str) -> str:
     """Search arXiv for a query and return maximum 3 results."""
+    try:
+        search_docs = ArxivLoader(query=query, load_max_docs=3).load()
+        formatted_search_docs = "\n\n---\n\n".join(
+            [
+                f'\n{doc.page_content[:1000]}\n'
+                for doc in search_docs
+            ]
+        )
+        return formatted_search_docs
+    except Exception as e:
+        return f"Error searching arXiv: {str(e)}"
 @tool
 def fetch_url_content(url: str) -> str:
         for script in soup(["script", "style"]):
             script.decompose()
         text = soup.get_text(separator='\n', strip=True)
+        return text[:3000] + ("..." if len(text) > 3000 else "")
     except Exception as e:
         return f"Error fetching URL: {str(e)}"
 # Tools list
 tools = [
     multiply, add, subtract, divide, modulus,
+    wiki_search, web_search, arxiv_search,
     fetch_url_content,
 ]
+# React prompt template
+react_prompt = PromptTemplate.from_template("""You are a helpful assistant that answers questions accurately and concisely.
+Answer the following questions as best you can. You have access to the following tools:
+{tools}
+Use the following format:
+Question: the input question you must answer
+Thought: you should always think about what to do
+Action: the action to take, should be one of [{tool_names}]
+Action Input: the input to the action
+Observation: the result of the action
+... (this Thought/Action/Action Input/Observation can repeat N times)
+Thought: I now know the final answer
+Final Answer: the final answer to the original input question
+IMPORTANT: Your Final Answer must be:
+- Short and direct (just the answer, no extra explanation)
+- A single value or short phrase
+- No formatting, no bullet points, no extra text
+- Just the factual answer
+Begin!
+Question: {input}
+Thought:{agent_scratchpad}""")
 class GAIAAgent:
     def __init__(self):
+        # create internal agent with React agent
         try:
+            agent = create_react_agent(model, tools, react_prompt)
+            self.agent_executor = AgentExecutor(
+                agent=agent,
+                tools=tools,
+                verbose=True,
+                handle_parsing_errors=True,
+                max_iterations=15
+            )
         except Exception as e:
+            print(f"Error creating agent: {e}")
             raise
     def __call__(self, question: str) -> str:
+        try:
+            result = self.agent_executor.invoke({"input": question})
+            answer = result.get("output", "")
+            # Clean up the answer - remove any extra formatting
+            answer = answer.strip()
+            # Remove common prefixes that might be added
+            prefixes_to_remove = [
+                "The answer is:",
+                "The final answer is:",
+                "Final Answer:",
+                "Answer:",
+            ]
+            for prefix in prefixes_to_remove:
+                if answer.startswith(prefix):
+                    answer = answer[len(prefix):].strip()
+            return answer
+        except Exception as e:
+            print(f"Error invoking agent: {e}")
+            return f"Error: {str(e)}"