import os
import yaml
import requests

from bs4 import BeautifulSoup

from smolagents import (
    load_tool, tool, Tool,
    DuckDuckGoSearchTool, FinalAnswerTool,
    CodeAgent, InferenceClientModel,
)

from Gradio_UI import GradioUI


class SiteContentFetcher(Tool):
    name = "site_content_fetcher"
    description = (
        "This tool fetches and cleans readable text from the specified URL. Normally used after some web_search_tool."
    )
    inputs = {
        "url": {
            "type": "string",
            "description": "The full URL of the website to fetch content from, including the protocol (http or https).",
        }
    }
    output_type = "string"

    def __init__(self):
        self.MAX_CHARS = 100_000  # Optional: limit size of returned content

    def forward(self, url: str) -> str:
        headers = {
            "User-Agent": "Mozilla/5.0 (compatible; SiteContentFetcher/1.0)"
        }

        try:
            response = requests.get(url, headers=headers, timeout=10)
            response.raise_for_status()
            cleaned_text = self._clean_html(response.text)
            return cleaned_text[:self.MAX_CHARS]
        except requests.exceptions.MissingSchema:
            return "Invalid URL format. Make sure it starts with http:// or https://"
        except requests.exceptions.Timeout:
            return "The request timed out. The site may be too slow or unresponsive."
        except requests.exceptions.ConnectionError:
            return f"Failed to connect to {url}. Please check if the site is reachable."
        except requests.exceptions.HTTPError as e:
            return f"HTTP error occurred: {e.response.status_code} {e.response.reason}"
        except Exception as e:
            return f"An unexpected error occurred: {str(e)}"

    def _clean_html(self, html: str) -> str:
        soup = BeautifulSoup(html, "html.parser")

        # Remove script, style, and noscript tags
        for tag in soup(["script", "style", "noscript"]):
            tag.decompose()

        # Extract and clean text
        text = soup.get_text(separator="\n")
        lines = [line.strip() for line in text.splitlines()]
        cleaned_lines = [line for line in lines if line]
        return "\n".join(cleaned_lines)


# @tool
# def my_custom_tool(arg1: str) -> str:
#     """ Description
#     Args:
#         arg1: the first argument
#     """
#     pass


# Model init
# If the agent does not answer, the model is overloaded, please use another model or the following Hugging Face Endpoint that also contains qwen2.5 coder:
# model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud'

model = InferenceClientModel(
    model_id='Qwen/Qwen2.5-Coder-32B-Instruct',
    max_tokens=2096,
    temperature=0.5,
    provider="auto",
    # token=os.environ["HF_TOKEN"],  # used this env var by default
)

# # Import tool from Hub
# image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)

# with open("prompts.yaml", 'r') as stream:
#     prompt_templates = yaml.safe_load(stream)


agent = CodeAgent(
    model=model,
    tools=[DuckDuckGoSearchTool(), SiteContentFetcher()], ## add your tools here (don't remove final answer)
    max_steps=5,
    verbosity_level=1,
    # grammar=None,
    # planning_interval=None,
    # name=None,
    # description=None,
    # prompt_templates=prompt_templates
)

GradioUI(agent).launch()