sajjadpsavoji commited on
Commit
8266ff7
·
1 Parent(s): 2c35d5c

add search and visit webpage functionality

Browse files
Files changed (4) hide show
  1. agent.py +21 -3
  2. requirements.txt +2 -0
  3. tools/visit_webpage.py +55 -0
  4. tools/web_search.py +58 -0
agent.py CHANGED
@@ -1,8 +1,13 @@
1
- from smolagents import CodeAgent, InferenceClientModel
2
- from tools.final_answer import FinalAnswerTool as FinalAnswer
3
  import os
4
  import yaml
5
 
 
 
 
 
 
 
 
6
  CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
7
 
8
  class Agent:
@@ -10,14 +15,26 @@ class Agent:
10
  self,
11
  default_answer: str = "Sorry, I don’t have an answer for that."
12
  ):
 
13
  model = InferenceClientModel(
14
  model_id='Qwen/Qwen2.5-Coder-32B-Instruct',
15
  )
 
 
 
 
 
 
 
 
 
16
  with open(os.path.join(CURRENT_DIR, "prompts.yaml"), 'r') as stream:
17
  prompt_templates = yaml.safe_load(stream)
 
 
18
  self.agent = CodeAgent(
19
  model=model,
20
- tools=[],
21
  managed_agents=[],
22
  max_steps=3,
23
  verbosity_level=1,
@@ -30,6 +47,7 @@ class Agent:
30
  max_print_outputs_length=None,
31
  prompt_templates=prompt_templates
32
  )
 
33
  self.default_answer = default_answer
34
 
35
  def __call__(self, question: str) -> str:
 
 
 
1
  import os
2
  import yaml
3
 
4
+ from smolagents import CodeAgent, InferenceClientModel
5
+
6
+ from tools.final_answer import FinalAnswerTool as FinalAnswer
7
+ from tools.web_search import DuckDuckGoSearchTool as WebSearch
8
+ from tools.visit_webpage import VisitWebpageTool as VisitWebpage
9
+
10
+
11
  CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
12
 
13
  class Agent:
 
15
  self,
16
  default_answer: str = "Sorry, I don’t have an answer for that."
17
  ):
18
+ # select the LLM model to use
19
  model = InferenceClientModel(
20
  model_id='Qwen/Qwen2.5-Coder-32B-Instruct',
21
  )
22
+
23
+ # select the tools to use
24
+ tools = [
25
+ FinalAnswer(),
26
+ WebSearch(),
27
+ VisitWebpage()
28
+ ]
29
+
30
+ # load the prompt templates from the prompts.yaml file
31
  with open(os.path.join(CURRENT_DIR, "prompts.yaml"), 'r') as stream:
32
  prompt_templates = yaml.safe_load(stream)
33
+
34
+ # create the agent with the selected model, tools, and prompt templates
35
  self.agent = CodeAgent(
36
  model=model,
37
+ tools=tools,
38
  managed_agents=[],
39
  max_steps=3,
40
  verbosity_level=1,
 
47
  max_print_outputs_length=None,
48
  prompt_templates=prompt_templates
49
  )
50
+ # set the default answer to return if the agent fails to answer
51
  self.default_answer = default_answer
52
 
53
  def __call__(self, question: str) -> str:
requirements.txt CHANGED
@@ -1,3 +1,5 @@
 
 
1
  gradio
2
  requests
3
  smolagents
 
1
+ duckduckgo_search
2
+ markdownify
3
  gradio
4
  requests
5
  smolagents
tools/visit_webpage.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any, Optional
2
+ from smolagents.tools import Tool
3
+ import markdownify
4
+ import re
5
+ import requests
6
+
7
+ class VisitWebpageTool(Tool):
8
+ name = "visit_webpage"
9
+ description = "Visits a webpage at the given url and reads its content as a markdown string. Use this to browse webpages."
10
+ inputs = {'url': {'type': 'string', 'description': 'The url of the webpage to visit.'}}
11
+ output_type = "string"
12
+
13
+ def __init__(self, max_output_length: int = 40000):
14
+ super().__init__()
15
+ self.max_output_length = max_output_length
16
+
17
+ def _truncate_content(self, content: str, max_length: int) -> str:
18
+ if len(content) <= max_length:
19
+ return content
20
+ return (
21
+ content[: max_length // 2]
22
+ + f"\n..._This content has been truncated to stay below {max_length} characters_...\n"
23
+ + content[-max_length // 2 :]
24
+ )
25
+
26
+ def forward(self, url: str) -> str:
27
+ try:
28
+ import re
29
+
30
+ import requests
31
+ from markdownify import markdownify
32
+ from requests.exceptions import RequestException
33
+ except ImportError as e:
34
+ raise ImportError(
35
+ "You must install packages `markdownify` and `requests` to run this tool: for instance run `pip install markdownify requests`."
36
+ ) from e
37
+ try:
38
+ # Send a GET request to the URL with a 20-second timeout
39
+ response = requests.get(url, timeout=20)
40
+ response.raise_for_status() # Raise an exception for bad status codes
41
+
42
+ # Convert the HTML content to Markdown
43
+ markdown_content = markdownify(response.text).strip()
44
+
45
+ # Remove multiple line breaks
46
+ markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
47
+
48
+ return self._truncate_content(markdown_content, self.max_output_length)
49
+
50
+ except requests.exceptions.Timeout:
51
+ return "The request timed out. Please try again later or check the URL."
52
+ except RequestException as e:
53
+ return f"Error fetching the webpage: {str(e)}"
54
+ except Exception as e:
55
+ return f"An unexpected error occurred: {str(e)}"
tools/web_search.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any, Optional
2
+ from smolagents.tools import Tool
3
+ import duckduckgo_search
4
+ import time
5
+
6
+ class DuckDuckGoSearchTool(Tool):
7
+ """Web search tool that performs searches using the DuckDuckGo search engine.
8
+ Args:
9
+ max_results (`int`, default `10`): Maximum number of search results to return.
10
+ rate_limit (`float`, default `1.0`): Maximum queries per second. Set to `None` to disable rate limiting.
11
+ **kwargs: Additional keyword arguments for the `DDGS` client.
12
+ Examples:
13
+ ```python
14
+ >>> from smolagents import DuckDuckGoSearchTool
15
+ >>> web_search_tool = DuckDuckGoSearchTool(max_results=5, rate_limit=2.0)
16
+ >>> results = web_search_tool("Hugging Face")
17
+ >>> print(results)
18
+ ```
19
+ """
20
+ name = "web_search"
21
+ description = "Performs a duckduckgo web search based on your query (think a Google search) then returns the top search results."
22
+ inputs = {'query': {'type': 'string', 'description': 'The search query to perform.'}}
23
+ output_type = "string"
24
+
25
+ def __init__(self, max_results: int = 10, rate_limit: float | None = 1.0, **kwargs):
26
+ super().__init__()
27
+ self.max_results = max_results
28
+ self.rate_limit = rate_limit
29
+ self._min_interval = 1.0 / rate_limit if rate_limit else 0.0
30
+ self._last_request_time = 0.0
31
+ try:
32
+ from duckduckgo_search import DDGS
33
+ except ImportError as e:
34
+ raise ImportError(
35
+ "You must install package `duckduckgo_search` to run this tool: for instance run `pip install duckduckgo-search`."
36
+ ) from e
37
+ self.ddgs = DDGS(**kwargs)
38
+
39
+ def forward(self, query: str) -> str:
40
+ self._enforce_rate_limit()
41
+ results = self.ddgs.text(query, max_results=self.max_results)
42
+ if len(results) == 0:
43
+ raise Exception("No results found! Try a less restrictive/shorter query.")
44
+ postprocessed_results = [f"[{result['title']}]({result['href']})\n{result['body']}" for result in results]
45
+ return "## Search Results\n\n" + "\n\n".join(postprocessed_results)
46
+
47
+ def _enforce_rate_limit(self) -> None:
48
+ import time
49
+
50
+ # No rate limit enforced
51
+ if not self.rate_limit:
52
+ return
53
+
54
+ now = time.time()
55
+ elapsed = now - self._last_request_time
56
+ if elapsed < self._min_interval:
57
+ time.sleep(self._min_interval - elapsed)
58
+ self._last_request_time = time.time()