Spaces:
Sleeping
Sleeping
| from smolagents import CodeAgent, HfApiModel, tool | |
| import os | |
| import requests | |
| from bs4 import BeautifulSoup | |
| from markdownify import markdownify | |
| from tools.final_answer import FinalAnswerTool | |
| from Gradio_UI import GradioUI | |
| def webpage_scraper(url: str) -> str: | |
| """A tool that scrapes and summarizes webpage content from a given URL. | |
| Args: | |
| url: URL of the webpage to scrape and summarize | |
| Returns: | |
| str: A summary of the webpage content including title and main text | |
| """ | |
| try: | |
| headers = { | |
| "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" | |
| } | |
| response = requests.get(url, headers=headers, timeout=15) | |
| response.raise_for_status() | |
| soup = BeautifulSoup(response.text, 'html.parser') | |
| # Get title | |
| title = soup.title.string if soup.title else "No title" | |
| # Find main content | |
| main_content = ( | |
| soup.find('main') or | |
| soup.find('article') or | |
| soup.find('div', class_='content') or | |
| soup.body | |
| ) | |
| if main_content: | |
| # Remove unwanted elements | |
| for tag in main_content.find_all(['script', 'style', 'nav', 'footer', 'aside', 'header']): | |
| tag.decompose() | |
| # Convert to markdown and clean up | |
| content = markdownify(str(main_content), heading_style="ATX") | |
| # Try to keep complete sentences | |
| content = content[:2000].rsplit('.', 1)[0] + '...' | |
| else: | |
| content = "Could not find main content" | |
| return f"Title: {title}\n\nContent Summary:\n{content}" | |
| except Exception as e: | |
| return f"Error scraping webpage: {str(e)}" | |
| final_answer = FinalAnswerTool() | |
| # Keep the same model endpoint | |
| model = HfApiModel( | |
| model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud', | |
| max_tokens=2096, | |
| temperature=0.5, | |
| custom_role_conversions=None, | |
| ) | |
| agent = CodeAgent( | |
| model=model, | |
| tools=[ | |
| final_answer, | |
| webpage_scraper | |
| ], | |
| max_steps=5, | |
| verbosity_level=1, | |
| grammar=None, | |
| planning_interval=None, | |
| name="Web Scraping Agent", | |
| description="An agent capable of scraping and analyzing web content", | |
| prompt_templates=None # Remove prompts.yaml dependency | |
| ) | |
| GradioUI(agent).launch() |