qwikQ8 commited on
Commit
bc246d0
·
verified ·
1 Parent(s): 22bb69a

Create visit_webpage.py

Browse files

https://huggingface.co/spaces/agents-course/First_agent_template/blob/main/tools/visit_webpage.py#L7

Files changed (1) hide show
  1. tools/visit_webpage.py +46 -0
tools/visit_webpage.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any, Optional
2
+ from smolagents.tools import Tool
3
+ import requests
4
+ import markdownify
5
+ import smolagents
6
+
7
+ class VisitWebpageTool(Tool):
8
+ name = "visit_webpage"
9
+ description = "Visits a webpage at the given url and reads its content as a markdown string. Use this to browse webpages."
10
+ inputs = {'url': {'type': 'string', 'description': 'The url of the webpage to visit.'}}
11
+ output_type = "string"
12
+
13
+
14
+ def forward(self, url: str) -> str:
15
+ try:
16
+ import requests
17
+ from markdownify import markdownify
18
+ from requests.exceptions import RequestException
19
+
20
+ from smolagents.utils import truncate_content
21
+ except ImportError as e:
22
+ raise ImportError(
23
+ "You must install packages `markdownify` and `requests` to run this tool: for instance run `pip install markdownify requests`."
24
+ ) from e
25
+ try:
26
+ # Send a GET request to the URL with a 20-second timeout
27
+ response = requests.get(url, timeout=20)
28
+ response.raise_for_status() # Raise an exception for bad status codes
29
+
30
+ # Convert the HTML content to Markdown
31
+ markdown_content = markdownify(response.text).strip()
32
+
33
+ # Remove multiple line breaks
34
+ markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
35
+
36
+ return truncate_content(markdown_content, 10000)
37
+
38
+ except requests.exceptions.Timeout:
39
+ return "The request timed out. Please try again later or check the URL."
40
+ except RequestException as e:
41
+ return f"Error fetching the webpage: {str(e)}"
42
+ except Exception as e:
43
+ return f"An unexpected error occurred: {str(e)}"
44
+
45
+ def __init__(self, *args, **kwargs):
46
+ self.is_initialized = False