Spaces:
Sleeping
Sleeping
| import os | |
| import yaml | |
| import requests | |
| from bs4 import BeautifulSoup | |
| from smolagents import ( | |
| load_tool, tool, Tool, | |
| DuckDuckGoSearchTool, FinalAnswerTool, | |
| CodeAgent, InferenceClientModel, | |
| ) | |
| from Gradio_UI import GradioUI | |
| class SiteContentFetcher(Tool): | |
| name = "site_content_fetcher" | |
| description = ( | |
| "This tool fetches and cleans readable text from the specified URL. Normally used after some web_search_tool." | |
| ) | |
| inputs = { | |
| "url": { | |
| "type": "string", | |
| "description": "The full URL of the website to fetch content from, including the protocol (http or https).", | |
| } | |
| } | |
| output_type = "string" | |
| def __init__(self): | |
| self.MAX_CHARS = 100_000 # Optional: limit size of returned content | |
| def forward(self, url: str) -> str: | |
| headers = { | |
| "User-Agent": "Mozilla/5.0 (compatible; SiteContentFetcher/1.0)" | |
| } | |
| try: | |
| response = requests.get(url, headers=headers, timeout=10) | |
| response.raise_for_status() | |
| cleaned_text = self._clean_html(response.text) | |
| return cleaned_text[:self.MAX_CHARS] | |
| except requests.exceptions.MissingSchema: | |
| return "Invalid URL format. Make sure it starts with http:// or https://" | |
| except requests.exceptions.Timeout: | |
| return "The request timed out. The site may be too slow or unresponsive." | |
| except requests.exceptions.ConnectionError: | |
| return f"Failed to connect to {url}. Please check if the site is reachable." | |
| except requests.exceptions.HTTPError as e: | |
| return f"HTTP error occurred: {e.response.status_code} {e.response.reason}" | |
| except Exception as e: | |
| return f"An unexpected error occurred: {str(e)}" | |
| def _clean_html(self, html: str) -> str: | |
| soup = BeautifulSoup(html, "html.parser") | |
| # Remove script, style, and noscript tags | |
| for tag in soup(["script", "style", "noscript"]): | |
| tag.decompose() | |
| # Extract and clean text | |
| text = soup.get_text(separator="\n") | |
| lines = [line.strip() for line in text.splitlines()] | |
| cleaned_lines = [line for line in lines if line] | |
| return "\n".join(cleaned_lines) | |
| # @tool | |
| # def my_custom_tool(arg1: str) -> str: | |
| # """ Description | |
| # Args: | |
| # arg1: the first argument | |
| # """ | |
| # pass | |
| # Model init | |
| # If the agent does not answer, the model is overloaded, please use another model or the following Hugging Face Endpoint that also contains qwen2.5 coder: | |
| # model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud' | |
| model = InferenceClientModel( | |
| model_id='Qwen/Qwen2.5-Coder-32B-Instruct', | |
| max_tokens=2096, | |
| temperature=0.5, | |
| provider="auto", | |
| # token=os.environ["HF_TOKEN"], # used this env var by default | |
| ) | |
| # # Import tool from Hub | |
| # image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True) | |
| # with open("prompts.yaml", 'r') as stream: | |
| # prompt_templates = yaml.safe_load(stream) | |
| agent = CodeAgent( | |
| model=model, | |
| tools=[DuckDuckGoSearchTool(), SiteContentFetcher()], ## add your tools here (don't remove final answer) | |
| max_steps=5, | |
| verbosity_level=1, | |
| # grammar=None, | |
| # planning_interval=None, | |
| # name=None, | |
| # description=None, | |
| # prompt_templates=prompt_templates | |
| ) | |
| GradioUI(agent).launch() |