Spaces:
Sleeping
Sleeping
| # from crewai.tools import tool | |
| # from scrapegraphai.graphs import SmartScraperGraph | |
| # from schemas.extractor_schema import SinglePartOfContent | |
| # from modules import llm | |
| # @tool | |
| # def web_scraping_tool(page_url: str): | |
| # """ | |
| # An AI Tool to help an agent to scrape a web page using ScrapegraphAI. | |
| # """ | |
| # try: | |
| # graph = SmartScraperGraph( | |
| # prompt=( | |
| # "Extract ALL detailed content from the web page without summarizing. " | |
| # "Preserve full paragraphs, lists, and section headers. " | |
| # "Include associated media (images, videos, audios, pdfs). " | |
| # "Fit everything into this schema:\n" | |
| # "```json\n" + SinglePartOfContent.schema_json() + "\n```" | |
| # ), | |
| # source=page_url, | |
| # llm=llm, | |
| # ) | |
| # details = graph.run() | |
| # return { | |
| # "page_url": page_url, | |
| # "details": details, | |
| # } | |
| # except Exception as e: | |
| # raise RuntimeError(f"Scraping failed for {page_url}: {str(e)}") | |