sajjadpsavoji commited on
Commit ·
8266ff7
1
Parent(s): 2c35d5c
add search and visit webpage functionality
Browse files- agent.py +21 -3
- requirements.txt +2 -0
- tools/visit_webpage.py +55 -0
- tools/web_search.py +58 -0
agent.py
CHANGED
|
@@ -1,8 +1,13 @@
|
|
| 1 |
-
from smolagents import CodeAgent, InferenceClientModel
|
| 2 |
-
from tools.final_answer import FinalAnswerTool as FinalAnswer
|
| 3 |
import os
|
| 4 |
import yaml
|
| 5 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
|
| 7 |
|
| 8 |
class Agent:
|
|
@@ -10,14 +15,26 @@ class Agent:
|
|
| 10 |
self,
|
| 11 |
default_answer: str = "Sorry, I don’t have an answer for that."
|
| 12 |
):
|
|
|
|
| 13 |
model = InferenceClientModel(
|
| 14 |
model_id='Qwen/Qwen2.5-Coder-32B-Instruct',
|
| 15 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
with open(os.path.join(CURRENT_DIR, "prompts.yaml"), 'r') as stream:
|
| 17 |
prompt_templates = yaml.safe_load(stream)
|
|
|
|
|
|
|
| 18 |
self.agent = CodeAgent(
|
| 19 |
model=model,
|
| 20 |
-
tools=
|
| 21 |
managed_agents=[],
|
| 22 |
max_steps=3,
|
| 23 |
verbosity_level=1,
|
|
@@ -30,6 +47,7 @@ class Agent:
|
|
| 30 |
max_print_outputs_length=None,
|
| 31 |
prompt_templates=prompt_templates
|
| 32 |
)
|
|
|
|
| 33 |
self.default_answer = default_answer
|
| 34 |
|
| 35 |
def __call__(self, question: str) -> str:
|
|
|
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
import yaml
|
| 3 |
|
| 4 |
+
from smolagents import CodeAgent, InferenceClientModel
|
| 5 |
+
|
| 6 |
+
from tools.final_answer import FinalAnswerTool as FinalAnswer
|
| 7 |
+
from tools.web_search import DuckDuckGoSearchTool as WebSearch
|
| 8 |
+
from tools.visit_webpage import VisitWebpageTool as VisitWebpage
|
| 9 |
+
|
| 10 |
+
|
| 11 |
CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
|
| 12 |
|
| 13 |
class Agent:
|
|
|
|
| 15 |
self,
|
| 16 |
default_answer: str = "Sorry, I don’t have an answer for that."
|
| 17 |
):
|
| 18 |
+
# select the LLM model to use
|
| 19 |
model = InferenceClientModel(
|
| 20 |
model_id='Qwen/Qwen2.5-Coder-32B-Instruct',
|
| 21 |
)
|
| 22 |
+
|
| 23 |
+
# select the tools to use
|
| 24 |
+
tools = [
|
| 25 |
+
FinalAnswer(),
|
| 26 |
+
WebSearch(),
|
| 27 |
+
VisitWebpage()
|
| 28 |
+
]
|
| 29 |
+
|
| 30 |
+
# load the prompt templates from the prompts.yaml file
|
| 31 |
with open(os.path.join(CURRENT_DIR, "prompts.yaml"), 'r') as stream:
|
| 32 |
prompt_templates = yaml.safe_load(stream)
|
| 33 |
+
|
| 34 |
+
# create the agent with the selected model, tools, and prompt templates
|
| 35 |
self.agent = CodeAgent(
|
| 36 |
model=model,
|
| 37 |
+
tools=tools,
|
| 38 |
managed_agents=[],
|
| 39 |
max_steps=3,
|
| 40 |
verbosity_level=1,
|
|
|
|
| 47 |
max_print_outputs_length=None,
|
| 48 |
prompt_templates=prompt_templates
|
| 49 |
)
|
| 50 |
+
# set the default answer to return if the agent fails to answer
|
| 51 |
self.default_answer = default_answer
|
| 52 |
|
| 53 |
def __call__(self, question: str) -> str:
|
requirements.txt
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
|
|
|
|
|
| 1 |
gradio
|
| 2 |
requests
|
| 3 |
smolagents
|
|
|
|
| 1 |
+
duckduckgo_search
|
| 2 |
+
markdownify
|
| 3 |
gradio
|
| 4 |
requests
|
| 5 |
smolagents
|
tools/visit_webpage.py
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Any, Optional
|
| 2 |
+
from smolagents.tools import Tool
|
| 3 |
+
import markdownify
|
| 4 |
+
import re
|
| 5 |
+
import requests
|
| 6 |
+
|
| 7 |
+
class VisitWebpageTool(Tool):
|
| 8 |
+
name = "visit_webpage"
|
| 9 |
+
description = "Visits a webpage at the given url and reads its content as a markdown string. Use this to browse webpages."
|
| 10 |
+
inputs = {'url': {'type': 'string', 'description': 'The url of the webpage to visit.'}}
|
| 11 |
+
output_type = "string"
|
| 12 |
+
|
| 13 |
+
def __init__(self, max_output_length: int = 40000):
|
| 14 |
+
super().__init__()
|
| 15 |
+
self.max_output_length = max_output_length
|
| 16 |
+
|
| 17 |
+
def _truncate_content(self, content: str, max_length: int) -> str:
|
| 18 |
+
if len(content) <= max_length:
|
| 19 |
+
return content
|
| 20 |
+
return (
|
| 21 |
+
content[: max_length // 2]
|
| 22 |
+
+ f"\n..._This content has been truncated to stay below {max_length} characters_...\n"
|
| 23 |
+
+ content[-max_length // 2 :]
|
| 24 |
+
)
|
| 25 |
+
|
| 26 |
+
def forward(self, url: str) -> str:
|
| 27 |
+
try:
|
| 28 |
+
import re
|
| 29 |
+
|
| 30 |
+
import requests
|
| 31 |
+
from markdownify import markdownify
|
| 32 |
+
from requests.exceptions import RequestException
|
| 33 |
+
except ImportError as e:
|
| 34 |
+
raise ImportError(
|
| 35 |
+
"You must install packages `markdownify` and `requests` to run this tool: for instance run `pip install markdownify requests`."
|
| 36 |
+
) from e
|
| 37 |
+
try:
|
| 38 |
+
# Send a GET request to the URL with a 20-second timeout
|
| 39 |
+
response = requests.get(url, timeout=20)
|
| 40 |
+
response.raise_for_status() # Raise an exception for bad status codes
|
| 41 |
+
|
| 42 |
+
# Convert the HTML content to Markdown
|
| 43 |
+
markdown_content = markdownify(response.text).strip()
|
| 44 |
+
|
| 45 |
+
# Remove multiple line breaks
|
| 46 |
+
markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
|
| 47 |
+
|
| 48 |
+
return self._truncate_content(markdown_content, self.max_output_length)
|
| 49 |
+
|
| 50 |
+
except requests.exceptions.Timeout:
|
| 51 |
+
return "The request timed out. Please try again later or check the URL."
|
| 52 |
+
except RequestException as e:
|
| 53 |
+
return f"Error fetching the webpage: {str(e)}"
|
| 54 |
+
except Exception as e:
|
| 55 |
+
return f"An unexpected error occurred: {str(e)}"
|
tools/web_search.py
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Any, Optional
|
| 2 |
+
from smolagents.tools import Tool
|
| 3 |
+
import duckduckgo_search
|
| 4 |
+
import time
|
| 5 |
+
|
| 6 |
+
class DuckDuckGoSearchTool(Tool):
|
| 7 |
+
"""Web search tool that performs searches using the DuckDuckGo search engine.
|
| 8 |
+
Args:
|
| 9 |
+
max_results (`int`, default `10`): Maximum number of search results to return.
|
| 10 |
+
rate_limit (`float`, default `1.0`): Maximum queries per second. Set to `None` to disable rate limiting.
|
| 11 |
+
**kwargs: Additional keyword arguments for the `DDGS` client.
|
| 12 |
+
Examples:
|
| 13 |
+
```python
|
| 14 |
+
>>> from smolagents import DuckDuckGoSearchTool
|
| 15 |
+
>>> web_search_tool = DuckDuckGoSearchTool(max_results=5, rate_limit=2.0)
|
| 16 |
+
>>> results = web_search_tool("Hugging Face")
|
| 17 |
+
>>> print(results)
|
| 18 |
+
```
|
| 19 |
+
"""
|
| 20 |
+
name = "web_search"
|
| 21 |
+
description = "Performs a duckduckgo web search based on your query (think a Google search) then returns the top search results."
|
| 22 |
+
inputs = {'query': {'type': 'string', 'description': 'The search query to perform.'}}
|
| 23 |
+
output_type = "string"
|
| 24 |
+
|
| 25 |
+
def __init__(self, max_results: int = 10, rate_limit: float | None = 1.0, **kwargs):
|
| 26 |
+
super().__init__()
|
| 27 |
+
self.max_results = max_results
|
| 28 |
+
self.rate_limit = rate_limit
|
| 29 |
+
self._min_interval = 1.0 / rate_limit if rate_limit else 0.0
|
| 30 |
+
self._last_request_time = 0.0
|
| 31 |
+
try:
|
| 32 |
+
from duckduckgo_search import DDGS
|
| 33 |
+
except ImportError as e:
|
| 34 |
+
raise ImportError(
|
| 35 |
+
"You must install package `duckduckgo_search` to run this tool: for instance run `pip install duckduckgo-search`."
|
| 36 |
+
) from e
|
| 37 |
+
self.ddgs = DDGS(**kwargs)
|
| 38 |
+
|
| 39 |
+
def forward(self, query: str) -> str:
|
| 40 |
+
self._enforce_rate_limit()
|
| 41 |
+
results = self.ddgs.text(query, max_results=self.max_results)
|
| 42 |
+
if len(results) == 0:
|
| 43 |
+
raise Exception("No results found! Try a less restrictive/shorter query.")
|
| 44 |
+
postprocessed_results = [f"[{result['title']}]({result['href']})\n{result['body']}" for result in results]
|
| 45 |
+
return "## Search Results\n\n" + "\n\n".join(postprocessed_results)
|
| 46 |
+
|
| 47 |
+
def _enforce_rate_limit(self) -> None:
|
| 48 |
+
import time
|
| 49 |
+
|
| 50 |
+
# No rate limit enforced
|
| 51 |
+
if not self.rate_limit:
|
| 52 |
+
return
|
| 53 |
+
|
| 54 |
+
now = time.time()
|
| 55 |
+
elapsed = now - self._last_request_time
|
| 56 |
+
if elapsed < self._min_interval:
|
| 57 |
+
time.sleep(self._min_interval - elapsed)
|
| 58 |
+
self._last_request_time = time.time()
|