File size: 817 Bytes
ca6fbc3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
from bs4 import BeautifulSoup
from smolagents import Tool


class WebpageParser(Tool):
    name: str = "webpage_parser_tool"
    description: str = (
        "This tool parses elements from HTML to make them easily searchable."
    )
    inputs: dict[str, dict[str, str]] = {
        "html_string": {
            "type": "string",
            "description": "The HTML content as a string.",
        },
    }
    output_type: str = "array"

    def forward(self, html_string: str) -> list[str]:
        """
        Parses the HTML string and returns all elements as an array.
        """
        # Create a BeautifulSoup object
        soup = BeautifulSoup(html_string, "html.parser")

        # Extract all elements as strings
        elements = [str(element) for element in soup.find_all()]

        return elements