First_agent_template

Sleeping

App Files Files Community

First_agent_template / app.py

NikitaBaramiia

Update app.py

a1422d2 verified 7 months ago

raw

history blame contribute delete

3.42 kB

	import os
	import yaml
	import requests

	from bs4 import BeautifulSoup

	from smolagents import (
	load_tool, tool, Tool,
	DuckDuckGoSearchTool, FinalAnswerTool,
	CodeAgent, InferenceClientModel,
	)

	from Gradio_UI import GradioUI


	class SiteContentFetcher(Tool):
	name = "site_content_fetcher"
	description = (
	"This tool fetches and cleans readable text from the specified URL. Normally used after some web_search_tool."
	)
	inputs = {
	"url": {
	"type": "string",
	"description": "The full URL of the website to fetch content from, including the protocol (http or https).",
	}
	}
	output_type = "string"

	def __init__(self):
	self.MAX_CHARS = 100_000 # Optional: limit size of returned content

	def forward(self, url: str) -> str:
	headers = {
	"User-Agent": "Mozilla/5.0 (compatible; SiteContentFetcher/1.0)"
	}

	try:
	response = requests.get(url, headers=headers, timeout=10)
	response.raise_for_status()
	cleaned_text = self._clean_html(response.text)
	return cleaned_text[:self.MAX_CHARS]
	except requests.exceptions.MissingSchema:
	return "Invalid URL format. Make sure it starts with http:// or https://"
	except requests.exceptions.Timeout:
	return "The request timed out. The site may be too slow or unresponsive."
	except requests.exceptions.ConnectionError:
	return f"Failed to connect to {url}. Please check if the site is reachable."
	except requests.exceptions.HTTPError as e:
	return f"HTTP error occurred: {e.response.status_code} {e.response.reason}"
	except Exception as e:
	return f"An unexpected error occurred: {str(e)}"

	def _clean_html(self, html: str) -> str:
	soup = BeautifulSoup(html, "html.parser")

	# Remove script, style, and noscript tags
	for tag in soup(["script", "style", "noscript"]):
	tag.decompose()

	# Extract and clean text
	text = soup.get_text(separator="\n")
	lines = [line.strip() for line in text.splitlines()]
	cleaned_lines = [line for line in lines if line]
	return "\n".join(cleaned_lines)


	# @tool
	# def my_custom_tool(arg1: str) -> str:
	# """ Description
	# Args:
	# arg1: the first argument
	# """
	# pass


	# Model init
	# If the agent does not answer, the model is overloaded, please use another model or the following Hugging Face Endpoint that also contains qwen2.5 coder:
	# model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud'

	model = InferenceClientModel(
	model_id='Qwen/Qwen2.5-Coder-32B-Instruct',
	max_tokens=2096,
	temperature=0.5,
	provider="auto",
	# token=os.environ["HF_TOKEN"], # used this env var by default
	)

	# # Import tool from Hub
	# image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)

	# with open("prompts.yaml", 'r') as stream:
	# prompt_templates = yaml.safe_load(stream)


	agent = CodeAgent(
	model=model,
	tools=[DuckDuckGoSearchTool(), SiteContentFetcher()], ## add your tools here (don't remove final answer)
	max_steps=5,
	verbosity_level=1,
	# grammar=None,
	# planning_interval=None,
	# name=None,
	# description=None,
	# prompt_templates=prompt_templates
	)

	GradioUI(agent).launch()