"""
Web Search Tool
================
Abstraction layer for web search functionality.
Supports real search via DuckDuckGo HTML scraping or API services,
with fallback to simulated results.
"""

import os
import re
import logging
from typing import Optional
from dataclasses import dataclass

try:
    import httpx
    HTTPX_AVAILABLE = True
except ImportError:
    HTTPX_AVAILABLE = False

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("Searcher")


@dataclass
class SearchResult:
    """Represents a single search result."""
    title: str
    url: str
    snippet: str
    
    def to_dict(self) -> dict[str, str]:
        return {
            "title": self.title,
            "url": self.url,
            "snippet": self.snippet
        }


class SearchConfig:
    """Configuration for search behavior."""
    
    # Environment variable for API key (if using paid service)
    SERPER_API_KEY_ENV = "SERPER_API_KEY"
    
    # DuckDuckGo HTML endpoint (no API key needed)
    DUCKDUCKGO_HTML_URL = "https://html.duckduckgo.com/html/"
    
    # Timeout settings
    REQUEST_TIMEOUT = 10.0
    
    # Rate limiting
    MAX_RESULTS = 5
    
    @classmethod
    def get_api_key(cls) -> Optional[str]:
        """Get API key from environment if available."""
        return os.environ.get(cls.SERPER_API_KEY_ENV)
    
    @classmethod
    def has_api_key(cls) -> bool:
        """Check if API key is configured."""
        return cls.get_api_key() is not None


async def search(query: str, max_results: int = 5) -> list[dict[str, str]]:
    """
    Perform a web search and return results.
    
    This function tries multiple search strategies:
    1. If SERPER_API_KEY is set, use Serper.dev API
    2. Otherwise, try DuckDuckGo HTML scraping
    3. If all else fails, return simulated results
    
    Args:
        query: The search query string
        max_results: Maximum number of results to return
        
    Returns:
        List of search result dictionaries with title, url, snippet
    """
    logger.info(f"Searching for: {query}")
    
    # Strategy 1: Try Serper API if configured
    if SearchConfig.has_api_key():
        try:
            results = await _search_serper(query, max_results)
            if results:
                logger.info(f"Serper returned {len(results)} results")
                return results
        except Exception as e:
            logger.warning(f"Serper search failed: {e}")
    
    # Strategy 2: Try DuckDuckGo HTML scraping
    if HTTPX_AVAILABLE:
        try:
            results = await _search_duckduckgo(query, max_results)
            if results:
                logger.info(f"DuckDuckGo returned {len(results)} results")
                return results
        except Exception as e:
            logger.warning(f"DuckDuckGo search failed: {e}")
    
    # Strategy 3: Fallback to simulated results
    logger.info("Using simulated search results")
    return _simulate_search(query, max_results)


async def _search_serper(query: str, max_results: int) -> list[dict[str, str]]:
    """
    Search using Serper.dev API.
    
    Args:
        query: Search query
        max_results: Max results to return
        
    Returns:
        List of search results
    """
    if not HTTPX_AVAILABLE:
        raise RuntimeError("httpx not available")
    
    api_key = SearchConfig.get_api_key()
    if not api_key:
        raise ValueError("SERPER_API_KEY not set")
    
    async with httpx.AsyncClient(timeout=SearchConfig.REQUEST_TIMEOUT) as client:
        response = await client.post(
            "https://google.serper.dev/search",
            headers={
                "X-API-KEY": api_key,
                "Content-Type": "application/json"
            },
            json={"q": query, "num": max_results}
        )
        response.raise_for_status()
        data = response.json()
    
    results = []
    for item in data.get("organic", [])[:max_results]:
        results.append({
            "title": item.get("title", ""),
            "url": item.get("link", ""),
            "snippet": item.get("snippet", "")
        })
    
    return results


async def _search_duckduckgo(query: str, max_results: int) -> list[dict[str, str]]:
    """
    Search using DuckDuckGo HTML endpoint (no API key needed).
    
    Args:
        query: Search query
        max_results: Max results to return
        
    Returns:
        List of search results
    """
    if not HTTPX_AVAILABLE:
        raise RuntimeError("httpx not available")
    
    async with httpx.AsyncClient(timeout=SearchConfig.REQUEST_TIMEOUT) as client:
        response = await client.post(
            SearchConfig.DUCKDUCKGO_HTML_URL,
            data={"q": query},
            headers={
                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
            }
        )
        response.raise_for_status()
        html = response.text
    
    # Parse results from HTML using regex (simple extraction)
    results = []
    
    # Find result blocks
    result_pattern = r'<a[^>]*class="result__a"[^>]*href="([^"]*)"[^>]*>([^<]*)</a>'
    snippet_pattern = r'<a[^>]*class="result__snippet"[^>]*>([^<]*)</a>'
    
    urls_titles = re.findall(result_pattern, html)
    snippets = re.findall(snippet_pattern, html)
    
    for i, (url, title) in enumerate(urls_titles[:max_results]):
        snippet = snippets[i] if i < len(snippets) else ""
        
        # Clean up URL (DuckDuckGo uses redirects)
        if "uddg=" in url:
            url_match = re.search(r'uddg=([^&]+)', url)
            if url_match:
                from urllib.parse import unquote
                url = unquote(url_match.group(1))
        
        results.append({
            "title": title.strip(),
            "url": url,
            "snippet": snippet.strip()
        })
    
    return results


def _simulate_search(query: str, max_results: int) -> list[dict[str, str]]:
    """
    Generate simulated search results for testing/fallback.
    
    Args:
        query: Search query
        max_results: Max results to return
        
    Returns:
        List of simulated search results
    """
    base_results = [
        {
            "title": f"Research findings on {query}",
            "url": f"https://research.example.com/{query.replace(' ', '-')}",
            "snippet": f"Comprehensive research and analysis on {query}. "
                       f"Expert insights and latest developments."
        },
        {
            "title": f"Understanding {query}: A Complete Guide",
            "url": f"https://guide.example.org/{query.replace(' ', '-')}",
            "snippet": f"Everything you need to know about {query}. "
                       f"Detailed explanations and practical examples."
        },
        {
            "title": f"Latest developments in {query}",
            "url": f"https://news.example.com/topics/{query.replace(' ', '-')}",
            "snippet": f"Stay updated with the latest news about {query}. "
                       f"Breaking stories and expert commentary."
        },
        {
            "title": f"{query} - Academic perspectives",
            "url": f"https://academic.example.edu/{query.replace(' ', '-')}",
            "snippet": f"Academic research and peer-reviewed studies on {query}. "
                       f"Citations and methodology included."
        },
        {
            "title": f"Practical applications of {query}",
            "url": f"https://apply.example.io/{query.replace(' ', '-')}",
            "snippet": f"How to apply {query} in real-world scenarios. "
                       f"Case studies and implementation guides."
        }
    ]
    
    return base_results[:max_results]


# Synchronous wrapper for non-async contexts
def search_sync(query: str, max_results: int = 5) -> list[dict[str, str]]:
    """
    Synchronous version of search for non-async contexts.
    Falls back to simulated results.
    """
    return _simulate_search(query, max_results)