smol_agent / app.py
warren else
Update app.py
976827d verified
from smolagents import CodeAgent, HfApiModel, tool
import os
import requests
from bs4 import BeautifulSoup
from markdownify import markdownify
from tools.final_answer import FinalAnswerTool
from Gradio_UI import GradioUI
@tool
def webpage_scraper(url: str) -> str:
"""A tool that scrapes and summarizes webpage content from a given URL.
Args:
url: URL of the webpage to scrape and summarize
Returns:
str: A summary of the webpage content including title and main text
"""
try:
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}
response = requests.get(url, headers=headers, timeout=15)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
# Get title
title = soup.title.string if soup.title else "No title"
# Find main content
main_content = (
soup.find('main') or
soup.find('article') or
soup.find('div', class_='content') or
soup.body
)
if main_content:
# Remove unwanted elements
for tag in main_content.find_all(['script', 'style', 'nav', 'footer', 'aside', 'header']):
tag.decompose()
# Convert to markdown and clean up
content = markdownify(str(main_content), heading_style="ATX")
# Try to keep complete sentences
content = content[:2000].rsplit('.', 1)[0] + '...'
else:
content = "Could not find main content"
return f"Title: {title}\n\nContent Summary:\n{content}"
except Exception as e:
return f"Error scraping webpage: {str(e)}"
final_answer = FinalAnswerTool()
# Keep the same model endpoint
model = HfApiModel(
model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud',
max_tokens=2096,
temperature=0.5,
custom_role_conversions=None,
)
agent = CodeAgent(
model=model,
tools=[
final_answer,
webpage_scraper
],
max_steps=5,
verbosity_level=1,
grammar=None,
planning_interval=None,
name="Web Scraping Agent",
description="An agent capable of scraping and analyzing web content",
prompt_templates=None # Remove prompts.yaml dependency
)
GradioUI(agent).launch()