import os import yaml import requests from bs4 import BeautifulSoup from smolagents import ( load_tool, tool, Tool, DuckDuckGoSearchTool, FinalAnswerTool, CodeAgent, InferenceClientModel, ) from Gradio_UI import GradioUI class SiteContentFetcher(Tool): name = "site_content_fetcher" description = ( "This tool fetches and cleans readable text from the specified URL. Normally used after some web_search_tool." ) inputs = { "url": { "type": "string", "description": "The full URL of the website to fetch content from, including the protocol (http or https).", } } output_type = "string" def __init__(self): self.MAX_CHARS = 100_000 # Optional: limit size of returned content def forward(self, url: str) -> str: headers = { "User-Agent": "Mozilla/5.0 (compatible; SiteContentFetcher/1.0)" } try: response = requests.get(url, headers=headers, timeout=10) response.raise_for_status() cleaned_text = self._clean_html(response.text) return cleaned_text[:self.MAX_CHARS] except requests.exceptions.MissingSchema: return "Invalid URL format. Make sure it starts with http:// or https://" except requests.exceptions.Timeout: return "The request timed out. The site may be too slow or unresponsive." except requests.exceptions.ConnectionError: return f"Failed to connect to {url}. Please check if the site is reachable." except requests.exceptions.HTTPError as e: return f"HTTP error occurred: {e.response.status_code} {e.response.reason}" except Exception as e: return f"An unexpected error occurred: {str(e)}" def _clean_html(self, html: str) -> str: soup = BeautifulSoup(html, "html.parser") # Remove script, style, and noscript tags for tag in soup(["script", "style", "noscript"]): tag.decompose() # Extract and clean text text = soup.get_text(separator="\n") lines = [line.strip() for line in text.splitlines()] cleaned_lines = [line for line in lines if line] return "\n".join(cleaned_lines) # @tool # def my_custom_tool(arg1: str) -> str: # """ Description # Args: # arg1: the first argument # """ # pass # Model init # If the agent does not answer, the model is overloaded, please use another model or the following Hugging Face Endpoint that also contains qwen2.5 coder: # model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud' model = InferenceClientModel( model_id='Qwen/Qwen2.5-Coder-32B-Instruct', max_tokens=2096, temperature=0.5, provider="auto", # token=os.environ["HF_TOKEN"], # used this env var by default ) # # Import tool from Hub # image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True) # with open("prompts.yaml", 'r') as stream: # prompt_templates = yaml.safe_load(stream) agent = CodeAgent( model=model, tools=[DuckDuckGoSearchTool(), SiteContentFetcher()], ## add your tools here (don't remove final answer) max_steps=5, verbosity_level=1, # grammar=None, # planning_interval=None, # name=None, # description=None, # prompt_templates=prompt_templates ) GradioUI(agent).launch()