Spaces:
Sleeping
Sleeping
| from smolagents import tool | |
| import requests | |
| from bs4 import BeautifulSoup | |
| MAX_WEBPAGE_SIZE = 3000 # max characters to return from scraped content | |
| def visit_webpage(url: str) -> dict: | |
| """ | |
| Visits a webpage and extracts clean text from it. | |
| Args: | |
| url: The URL of the page to visit (e.g., https://en.wikipedia.org/wiki/OpenAI) | |
| Returns: | |
| dict: A dictionary containing: | |
| - "text": Truncated page content | |
| - "url": The original URL | |
| - "status": HTTP status or error info | |
| """ | |
| print(f" Tool:visit_webpage visiting {url}...") | |
| try: | |
| response = requests.get(url, timeout=10) | |
| response.raise_for_status() | |
| soup = BeautifulSoup(response.text, "html.parser") | |
| text = soup.get_text(separator="\n", strip=True) | |
| short_text = text[:MAX_WEBPAGE_SIZE] | |
| print(f"β Extracted {len(short_text)} characters from {url}") | |
| return { | |
| "text": short_text, | |
| "url": url, | |
| "status": f"Success ({response.status_code})" | |
| } | |
| except Exception as e: | |
| print(f"π¨ Error in visit_webpage: {e}") | |
| return { | |
| "text": "", | |
| "url": url, | |
| "status": f"Error: {e}" | |
| } | |