smol_agent

Sleeping

smol_agent / app.py

warren else

Update app.py

976827d verified 12 months ago

2.41 kB

	from smolagents import CodeAgent, HfApiModel, tool
	import os
	import requests
	from bs4 import BeautifulSoup
	from markdownify import markdownify
	from tools.final_answer import FinalAnswerTool
	from Gradio_UI import GradioUI

	@tool
	def webpage_scraper(url: str) -> str:
	"""A tool that scrapes and summarizes webpage content from a given URL.
	Args:
	url: URL of the webpage to scrape and summarize
	Returns:
	str: A summary of the webpage content including title and main text
	"""
	try:
	headers = {
	"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
	}
	response = requests.get(url, headers=headers, timeout=15)
	response.raise_for_status()
	soup = BeautifulSoup(response.text, 'html.parser')

	# Get title
	title = soup.title.string if soup.title else "No title"

	# Find main content
	main_content = (
	soup.find('main') or
	soup.find('article') or
	soup.find('div', class_='content') or
	soup.body
	)

	if main_content:
	# Remove unwanted elements
	for tag in main_content.find_all(['script', 'style', 'nav', 'footer', 'aside', 'header']):
	tag.decompose()

	# Convert to markdown and clean up
	content = markdownify(str(main_content), heading_style="ATX")
	# Try to keep complete sentences
	content = content[:2000].rsplit('.', 1)[0] + '...'
	else:
	content = "Could not find main content"

	return f"Title: {title}\n\nContent Summary:\n{content}"
	except Exception as e:
	return f"Error scraping webpage: {str(e)}"

	final_answer = FinalAnswerTool()

	# Keep the same model endpoint
	model = HfApiModel(
	model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud',
	max_tokens=2096,
	temperature=0.5,
	custom_role_conversions=None,
	)

	agent = CodeAgent(
	model=model,
	tools=[
	final_answer,
	webpage_scraper
	],
	max_steps=5,
	verbosity_level=1,
	grammar=None,
	planning_interval=None,
	name="Web Scraping Agent",
	description="An agent capable of scraping and analyzing web content",
	prompt_templates=None # Remove prompts.yaml dependency
	)

	GradioUI(agent).launch()