"""
Toolset for web search and web access.
"""

from typing import Any
from urllib.parse import urlparse

import requests
from bs4 import BeautifulSoup
from jinja2 import Template

from .registry import register_tool, register_toolset_desc

register_toolset_desc(
    "web",
    "Web toolset for searching the web and fetching content from URLs. Should be only used if it is indeed needed.",
)

WEB_SEARCH_TEMPLATE: Template = Template(
    """\
Results for web search query '{{ query }}':

{% for result in results %}
=== Web Result {{ loop.index }} ===
    {% for key, value in result.items() %}
{{ key }}: {{ value }}
    {%- endfor %}
{% endfor %}
"""
)


@register_tool(
    "web",
    {
        "type": "function",
        "function": {
            "name": "web_search",
            "description": "Perform a general web search and return the top results.",
            "parameters": {
                "type": "object",
                "properties": {
                    "query": {"type": "string", "description": "The search query to perform"},
                    "max_results": {
                        "type": "integer",
                        "description": "Maximum number of search results to return",
                        "default": 5,
                    },
                },
                "required": ["query"],
            },
        },
    },
)
def web_search(query: str, max_results: int = 5) -> str:
    """
    Perform a general web search and return the top results.
    """
    from ddgs import DDGS

    try:
        results: list[dict[str, Any]] = DDGS().text(query, max_results=max_results)
        output_text = WEB_SEARCH_TEMPLATE.render(query=query, results=results)
    except Exception as e:
        output_text = "web_search tool error: " + str(e)
    return output_text


@register_tool(
    "web",
    {
        "type": "function",
        "function": {
            "name": "get_url_content",
            "description": "Fetch and extract textual content from a web URL. Supports HTML and other textual content, but excludes binary files like PDFs, ZIP files, etc.",
            "parameters": {
                "type": "object",
                "properties": {
                    "url": {"type": "string", "description": "The URL to fetch content from"},
                    "timeout": {
                        "type": "integer",
                        "description": "Request timeout in seconds",
                        "default": 10,
                    },
                },
                "required": ["url"],
            },
        },
    },
)
def get_url_content(url: str, timeout: int = 10) -> str:
    """
    Fetch textual content from a web URL.

    Args:
        url: The URL to fetch content from
        timeout: Request timeout in seconds

    Returns:
        The textual content of the web page
    """
    try:
        # Parse URL to validate it
        parsed_url = urlparse(url)
        if not parsed_url.scheme or not parsed_url.netloc:
            return f"Error: Invalid URL format: {url}"

        # Set headers to mimic a browser request
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
        }

        # Make the request
        response = requests.get(url, headers=headers, timeout=timeout)
        response.raise_for_status()

        # Check content type to ensure it's textual
        content_type = response.headers.get("content-type", "").lower()

        # Reject binary file types
        binary_types = ["pdf", "zip", "rar", "7z", "tar", "gz", "exe", "dmg", "pkg", "deb", "rpm"]
        if any(binary_type in content_type for binary_type in binary_types):
            return f"Error: Binary content type detected ({content_type}). Only textual content is supported."

        # Handle HTML content
        if "text/html" in content_type:
            soup = BeautifulSoup(response.content, "html.parser")

            # Remove script and style elements
            for script in soup(["script", "style"]):
                script.decompose()

            # Extract title
            title = soup.find("title")
            title_text = title.get_text().strip() if title else "No title"

            # Extract main content
            text = soup.get_text()

            # Clean up whitespace
            lines = (line.strip() for line in text.splitlines())
            chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
            text = "\n".join(chunk for chunk in chunks if chunk)

            return f"Title: {title_text}\n\nContent:\n{text}"

        # Handle other textual content types
        elif any(
            text_type in content_type
            for text_type in ["text/", "application/json", "application/xml"]
        ):
            return f"Content from {url}:\n\n{response.text}"

        else:
            return f"Warning: Unknown content type ({content_type}). Attempting to extract as text:\n\n{response.text[:5000]}{'...' if len(response.text) > 5000 else ''}"

    except requests.exceptions.Timeout:
        return f"Error: Request timeout after {timeout} seconds for URL: {url}"
    except requests.exceptions.RequestException as e:
        return f"Error fetching URL {url}: {str(e)}"
    except Exception as e:
        return f"Unexpected error processing URL {url}: {str(e)}"