"""
Custom Tools for GAIA Benchmark Agent

Working tools:
1. wikipedia_search - Search Wikipedia for factual information
2. fetch_url_content - Fetch and extract text from web pages
"""

import os
import requests
from smolagents import tool
from bs4 import BeautifulSoup


@tool
def wikipedia_search(query: str, lang: str = "en") -> str:
    """Searches Wikipedia and returns a summary of the most relevant article.

    Args:
        query: The search query (e.g., "Mercedes Sosa discography")
        lang: Language code for Wikipedia (default: "en")

    Returns:
        The article title and summary text, or an error message if not found.
    """
    try:
        search_url = f"https://{lang}.wikipedia.org/w/api.php"
        headers = {
            "User-Agent": "GAIABenchmarkAgent/1.0 (Educational project)"
        }

        # Search for the query
        search_params = {
            "action": "query",
            "list": "search",
            "srsearch": query,
            "format": "json",
            "srlimit": 1
        }
        response = requests.get(search_url, params=search_params, headers=headers, timeout=10)
        response.raise_for_status()
        search_data = response.json()

        search_results = search_data.get("query", {}).get("search", [])
        if not search_results:
            return f"No Wikipedia article found for: {query}"

        page_title = search_results[0]["title"]

        # Get the page content
        content_params = {
            "action": "query",
            "titles": page_title,
            "prop": "extracts",
            "exintro": False,
            "explaintext": True,
            "format": "json",
            "exsectionformat": "plain"
        }
        response = requests.get(search_url, params=content_params, headers=headers, timeout=10)
        response.raise_for_status()
        content_data = response.json()

        pages = content_data.get("query", {}).get("pages", {})
        if not pages:
            return f"Could not retrieve content for: {page_title}"

        page = list(pages.values())[0]
        extract = page.get("extract", "")

        if not extract:
            return f"Wikipedia article '{page_title}' has no text content."

        if len(extract) > 8000:
            extract = extract[:8000] + "\n\n[Content truncated...]"

        return f"Wikipedia: {page_title}\n\n{extract}"

    except requests.exceptions.RequestException as e:
        return f"ERROR: Failed to search Wikipedia - {str(e)}"
    except Exception as e:
        return f"ERROR: Wikipedia search failed - {str(e)}"


@tool
def fetch_url_content(url: str) -> str:
    """Fetches and extracts text content from a given URL.

    Args:
        url: The URL to fetch content from

    Returns:
        The extracted text content from the webpage, or an error message.
    """
    try:
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
        }
        response = requests.get(url, headers=headers, timeout=15)
        response.raise_for_status()

        soup = BeautifulSoup(response.content, 'html.parser')

        # Remove non-content elements
        for element in soup(["script", "style", "nav", "header", "footer"]):
            element.decompose()

        text = soup.get_text()

        # Clean up whitespace
        lines = (line.strip() for line in text.splitlines())
        chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
        text = '\n'.join(chunk for chunk in chunks if chunk)

        if len(text) > 5000:
            text = text[:5000] + "\n\n[Content truncated]"

        return f"Content from {url}:\n\n{text}"

    except requests.exceptions.RequestException as e:
        return f"ERROR: Failed to fetch URL - {str(e)}"
    except Exception as e:
        return f"ERROR: {str(e)}"


# Export tools for use in agent.py
custom_tools = [
    wikipedia_search,
    fetch_url_content,
]