Spaces:
Sleeping
Sleeping
File size: 817 Bytes
ca6fbc3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 |
from bs4 import BeautifulSoup
from smolagents import Tool
class WebpageParser(Tool):
name: str = "webpage_parser_tool"
description: str = (
"This tool parses elements from HTML to make them easily searchable."
)
inputs: dict[str, dict[str, str]] = {
"html_string": {
"type": "string",
"description": "The HTML content as a string.",
},
}
output_type: str = "array"
def forward(self, html_string: str) -> list[str]:
"""
Parses the HTML string and returns all elements as an array.
"""
# Create a BeautifulSoup object
soup = BeautifulSoup(html_string, "html.parser")
# Extract all elements as strings
elements = [str(element) for element in soup.find_all()]
return elements
|