NikitaBaramiia's picture
Update app.py
a1422d2 verified
import os
import yaml
import requests
from bs4 import BeautifulSoup
from smolagents import (
load_tool, tool, Tool,
DuckDuckGoSearchTool, FinalAnswerTool,
CodeAgent, InferenceClientModel,
)
from Gradio_UI import GradioUI
class SiteContentFetcher(Tool):
name = "site_content_fetcher"
description = (
"This tool fetches and cleans readable text from the specified URL. Normally used after some web_search_tool."
)
inputs = {
"url": {
"type": "string",
"description": "The full URL of the website to fetch content from, including the protocol (http or https).",
}
}
output_type = "string"
def __init__(self):
self.MAX_CHARS = 100_000 # Optional: limit size of returned content
def forward(self, url: str) -> str:
headers = {
"User-Agent": "Mozilla/5.0 (compatible; SiteContentFetcher/1.0)"
}
try:
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status()
cleaned_text = self._clean_html(response.text)
return cleaned_text[:self.MAX_CHARS]
except requests.exceptions.MissingSchema:
return "Invalid URL format. Make sure it starts with http:// or https://"
except requests.exceptions.Timeout:
return "The request timed out. The site may be too slow or unresponsive."
except requests.exceptions.ConnectionError:
return f"Failed to connect to {url}. Please check if the site is reachable."
except requests.exceptions.HTTPError as e:
return f"HTTP error occurred: {e.response.status_code} {e.response.reason}"
except Exception as e:
return f"An unexpected error occurred: {str(e)}"
def _clean_html(self, html: str) -> str:
soup = BeautifulSoup(html, "html.parser")
# Remove script, style, and noscript tags
for tag in soup(["script", "style", "noscript"]):
tag.decompose()
# Extract and clean text
text = soup.get_text(separator="\n")
lines = [line.strip() for line in text.splitlines()]
cleaned_lines = [line for line in lines if line]
return "\n".join(cleaned_lines)
# @tool
# def my_custom_tool(arg1: str) -> str:
# """ Description
# Args:
# arg1: the first argument
# """
# pass
# Model init
# If the agent does not answer, the model is overloaded, please use another model or the following Hugging Face Endpoint that also contains qwen2.5 coder:
# model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud'
model = InferenceClientModel(
model_id='Qwen/Qwen2.5-Coder-32B-Instruct',
max_tokens=2096,
temperature=0.5,
provider="auto",
# token=os.environ["HF_TOKEN"], # used this env var by default
)
# # Import tool from Hub
# image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)
# with open("prompts.yaml", 'r') as stream:
# prompt_templates = yaml.safe_load(stream)
agent = CodeAgent(
model=model,
tools=[DuckDuckGoSearchTool(), SiteContentFetcher()], ## add your tools here (don't remove final answer)
max_steps=5,
verbosity_level=1,
# grammar=None,
# planning_interval=None,
# name=None,
# description=None,
# prompt_templates=prompt_templates
)
GradioUI(agent).launch()