agent1 / CompetitorTrackingAgent /tools /WebScrapingTool.py
varun324242's picture
Upload folder using huggingface_hub
06ba83e verified
from agency_swarm.tools import BaseTool
from pydantic import Field
import requests
from bs4 import BeautifulSoup
class WebScrapingTool(BaseTool):
"""
A tool for performing web scraping tasks using BeautifulSoup.
This tool fetches a web page, parses the HTML content, and extracts specific data based on given criteria or tags.
"""
url: str = Field(
..., description="The URL of the web page to scrape."
)
tag: str = Field(
..., description="The HTML tag to search for in the web page."
)
attribute: str = Field(
None, description="The attribute of the HTML tag to filter by, if any."
)
attribute_value: str = Field(
None, description="The value of the attribute to filter by, if any."
)
def run(self):
"""
Fetches the web page, parses the HTML content, and extracts data based on the specified tag and attribute.
"""
try:
# Make an HTTP request to fetch the web page
response = requests.get(self.url)
response.raise_for_status() # Raise an error for bad responses
# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(response.content, 'html.parser')
# Find all elements matching the specified tag and attribute
if self.attribute and self.attribute_value:
elements = soup.find_all(self.tag, {self.attribute: self.attribute_value})
else:
elements = soup.find_all(self.tag)
# Extract and return the text content of the found elements
extracted_data = [element.get_text(strip=True) for element in elements]
return extracted_data
except requests.RequestException as e:
return f"An error occurred while fetching the web page: {e}"
except Exception as e:
return f"An error occurred during parsing or extraction: {e}"
if __name__ == "__main__":
tool = WebScrapingTool(
url="https://example.com",
tag="p",
attribute="class",
attribute_value="content"
)
print(tool.run())