Spaces:
Sleeping
Sleeping
Edit web_browser_tool to use BeautifulSoup and limit response length
Browse files
app.py
CHANGED
|
@@ -19,16 +19,32 @@ def my_custom_tool(arg1:str, arg2:int)-> str: #it's import to specify the return
|
|
| 19 |
return "What magic will you build ?"
|
| 20 |
|
| 21 |
@tool
|
| 22 |
-
def web_browser_tool(url: str) -> str:
|
| 23 |
-
"""A tool that fetches and returns
|
| 24 |
|
| 25 |
Args:
|
| 26 |
url: The URL of the webpage to fetch
|
|
|
|
| 27 |
"""
|
| 28 |
try:
|
| 29 |
response = requests.get(url)
|
| 30 |
response.raise_for_status()
|
| 31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
except Exception as e:
|
| 33 |
return f"Error fetching webpage: {str(e)}"
|
| 34 |
|
|
|
|
| 19 |
return "What magic will you build ?"
|
| 20 |
|
| 21 |
@tool
|
| 22 |
+
def web_browser_tool(url: str, max_length: int = 1000) -> str:
|
| 23 |
+
"""A tool that fetches and returns a summarized version of webpage content
|
| 24 |
|
| 25 |
Args:
|
| 26 |
url: The URL of the webpage to fetch
|
| 27 |
+
max_length: Maximum length of returned content (default 1000 chars)
|
| 28 |
"""
|
| 29 |
try:
|
| 30 |
response = requests.get(url)
|
| 31 |
response.raise_for_status()
|
| 32 |
+
|
| 33 |
+
# Parse HTML
|
| 34 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
| 35 |
+
|
| 36 |
+
# Remove scripts, styles, and other non-content elements
|
| 37 |
+
for element in soup(['script', 'style', 'nav', 'header', 'footer', 'aside']):
|
| 38 |
+
element.decompose()
|
| 39 |
+
|
| 40 |
+
# Get main text content
|
| 41 |
+
text = soup.get_text(separator=' ', strip=True)
|
| 42 |
+
|
| 43 |
+
# Truncate and add ellipsis if needed
|
| 44 |
+
if len(text) > max_length:
|
| 45 |
+
text = text[:max_length] + "..."
|
| 46 |
+
|
| 47 |
+
return text
|
| 48 |
except Exception as e:
|
| 49 |
return f"Error fetching webpage: {str(e)}"
|
| 50 |
|