GAIA_v2

Sleeping

App Files Files Community

sajjadpsavoji commited on Jul 30, 2025

Commit

8266ff7

1 Parent(s): 2c35d5c

add search and visit webpage functionality

Browse files

Files changed (4) hide show

agent.py +21 -3
requirements.txt +2 -0
tools/visit_webpage.py +55 -0
tools/web_search.py +58 -0

agent.py CHANGED Viewed

@@ -1,8 +1,13 @@
-from smolagents import CodeAgent, InferenceClientModel
-from tools.final_answer import FinalAnswerTool as FinalAnswer
 import os
 import yaml
 CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
 class Agent:
@@ -10,14 +15,26 @@ class Agent:
         self,
         default_answer: str = "Sorry, I don’t have an answer for that."
     ):
         model = InferenceClientModel(
             model_id='Qwen/Qwen2.5-Coder-32B-Instruct',
         )
         with open(os.path.join(CURRENT_DIR, "prompts.yaml"), 'r') as stream:
             prompt_templates = yaml.safe_load(stream)
         self.agent = CodeAgent(
             model=model,
-            tools=[],
             managed_agents=[],
             max_steps=3,
             verbosity_level=1,
@@ -30,6 +47,7 @@ class Agent:
             max_print_outputs_length=None,
             prompt_templates=prompt_templates
         )
         self.default_answer = default_answer
     def __call__(self, question: str) -> str:

 import os
 import yaml
+from smolagents import CodeAgent, InferenceClientModel
+from tools.final_answer import FinalAnswerTool as FinalAnswer
+from tools.web_search import DuckDuckGoSearchTool as WebSearch
+from tools.visit_webpage import VisitWebpageTool as VisitWebpage
 CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
 class Agent:
         self,
         default_answer: str = "Sorry, I don’t have an answer for that."
     ):
+        # select the LLM model to use
         model = InferenceClientModel(
             model_id='Qwen/Qwen2.5-Coder-32B-Instruct',
         )
+        # select the tools to use
+        tools = [
+            FinalAnswer(),
+            WebSearch(),
+            VisitWebpage()
+        ]
+        # load the prompt templates from the prompts.yaml file
         with open(os.path.join(CURRENT_DIR, "prompts.yaml"), 'r') as stream:
             prompt_templates = yaml.safe_load(stream)
+        # create the agent with the selected model, tools, and prompt templates
         self.agent = CodeAgent(
             model=model,
+            tools=tools,
             managed_agents=[],
             max_steps=3,
             verbosity_level=1,
             max_print_outputs_length=None,
             prompt_templates=prompt_templates
         )
+        # set the default answer to return if the agent fails to answer
         self.default_answer = default_answer
     def __call__(self, question: str) -> str:

requirements.txt CHANGED Viewed

@@ -1,3 +1,5 @@
 gradio
 requests
 smolagents

+duckduckgo_search
+markdownify
 gradio
 requests
 smolagents

tools/visit_webpage.py ADDED Viewed

	@@ -0,0 +1,55 @@

+from typing import Any, Optional
+from smolagents.tools import Tool
+import markdownify
+import re
+import requests
+class VisitWebpageTool(Tool):
+    name = "visit_webpage"
+    description = "Visits a webpage at the given url and reads its content as a markdown string. Use this to browse webpages."
+    inputs = {'url': {'type': 'string', 'description': 'The url of the webpage to visit.'}}
+    output_type = "string"
+    def __init__(self, max_output_length: int = 40000):
+        super().__init__()
+        self.max_output_length = max_output_length
+    def _truncate_content(self, content: str, max_length: int) -> str:
+        if len(content) <= max_length:
+            return content
+        return (
+            content[: max_length // 2]
+            + f"\n..._This content has been truncated to stay below {max_length} characters_...\n"
+            + content[-max_length // 2 :]
+        )
+    def forward(self, url: str) -> str:
+        try:
+            import re
+            import requests
+            from markdownify import markdownify
+            from requests.exceptions import RequestException
+        except ImportError as e:
+            raise ImportError(
+                "You must install packages `markdownify` and `requests` to run this tool: for instance run `pip install markdownify requests`."
+            ) from e
+        try:
+            # Send a GET request to the URL with a 20-second timeout
+            response = requests.get(url, timeout=20)
+            response.raise_for_status()  # Raise an exception for bad status codes
+            # Convert the HTML content to Markdown
+            markdown_content = markdownify(response.text).strip()
+            # Remove multiple line breaks
+            markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
+            return self._truncate_content(markdown_content, self.max_output_length)
+        except requests.exceptions.Timeout:
+            return "The request timed out. Please try again later or check the URL."
+        except RequestException as e:
+            return f"Error fetching the webpage: {str(e)}"
+        except Exception as e:
+            return f"An unexpected error occurred: {str(e)}"

tools/web_search.py ADDED Viewed

	@@ -0,0 +1,58 @@

+from typing import Any, Optional
+from smolagents.tools import Tool
+import duckduckgo_search
+import time
+class DuckDuckGoSearchTool(Tool):
+    """Web search tool that performs searches using the DuckDuckGo search engine.
+    Args:
+        max_results (`int`, default `10`): Maximum number of search results to return.
+        rate_limit (`float`, default `1.0`): Maximum queries per second. Set to `None` to disable rate limiting.
+        **kwargs: Additional keyword arguments for the `DDGS` client.
+    Examples:
+        ```python
+        >>> from smolagents import DuckDuckGoSearchTool
+        >>> web_search_tool = DuckDuckGoSearchTool(max_results=5, rate_limit=2.0)
+        >>> results = web_search_tool("Hugging Face")
+        >>> print(results)
+        ```
+    """
+    name = "web_search"
+    description = "Performs a duckduckgo web search based on your query (think a Google search) then returns the top search results."
+    inputs = {'query': {'type': 'string', 'description': 'The search query to perform.'}}
+    output_type = "string"
+    def __init__(self, max_results: int = 10, rate_limit: float | None = 1.0, **kwargs):
+        super().__init__()
+        self.max_results = max_results
+        self.rate_limit = rate_limit
+        self._min_interval = 1.0 / rate_limit if rate_limit else 0.0
+        self._last_request_time = 0.0
+        try:
+            from duckduckgo_search import DDGS
+        except ImportError as e:
+            raise ImportError(
+                "You must install package `duckduckgo_search` to run this tool: for instance run `pip install duckduckgo-search`."
+            ) from e
+        self.ddgs = DDGS(**kwargs)
+    def forward(self, query: str) -> str:
+        self._enforce_rate_limit()
+        results = self.ddgs.text(query, max_results=self.max_results)
+        if len(results) == 0:
+            raise Exception("No results found! Try a less restrictive/shorter query.")
+        postprocessed_results = [f"[{result['title']}]({result['href']})\n{result['body']}" for result in results]
+        return "## Search Results\n\n" + "\n\n".join(postprocessed_results)
+    def _enforce_rate_limit(self) -> None:
+        import time
+        # No rate limit enforced
+        if not self.rate_limit:
+            return
+        now = time.time()
+        elapsed = now - self._last_request_time
+        if elapsed < self._min_interval:
+            time.sleep(self._min_interval - elapsed)
+        self._last_request_time = time.time()