# modules/web_search.py — AKIRA V19 (Dezembro 2025)
"""
Módulo de busca na web para APIs sem acesso nativo:
- Busca notícias de Angola (WebScraping)
- Busca geral (DuckDuckGo API - gratuita)
- Pesquisa de clima/tempo
- Cache de 15 minutos
"""
import time
import re
import requests
from typing import List, Dict, Any, Optional
from loguru import logger
from bs4 import BeautifulSoup

# === CONFIGURAÇÕES ===
CACHE_TTL = 900  # 15 minutos

class SimpleCache:
    """Cache simples em memória com TTL"""
    def __init__(self, ttl: int = CACHE_TTL):
        self.ttl = ttl
        self._data: Dict[str, Any] = {}
    
    def get(self, key: str):
        if key in self._data:
            value, timestamp = self._data[key]
            if time.time() - timestamp < self.ttl:
                return value
            del self._data[key]
        return None
    
    def set(self, key: str, value: Any):
        self._data[key] = (value, time.time())


class WebSearch:
    """
    Gerenciador de buscas na web:
    - Notícias de Angola (scraping)
    - Busca geral (DuckDuckGo)
    - Clima/tempo
    """
    
    def __init__(self):
        self.cache = SimpleCache(ttl=CACHE_TTL)
        self.session = requests.Session()
        self.session.headers.update({
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
            "Accept-Language": "pt-BR,pt;q=0.9,en-US;q=0.8,en;q=0.7"
        })
        
        # Fontes de notícias Angola
        self.fontes_angola = [
            "https://www.angop.ao/ultimas",
            "https://www.novojornal.co.ao/",
            "https://www.jornaldeangola.ao/"
        ]
    
    # ========================================================================
    # BUSCA GERAL (MULTI-FONTE - GRATUITA E ROBUSTA)
    # ========================================================================

    def buscar_geral(self, query: str, max_resultados: int = 3) -> str:
        """
        Busca geral na web usando múltiplas fontes gratuitas

        Args:
            query: Termo de busca
            max_resultados: Número máximo de resultados

        Returns:
            String formatada com resultados para o prompt da IA
        """
        cache_key = f"busca_geral_{query.lower()}"
        cached = self.cache.get(cache_key)
        if cached:
            return cached

        try:
            # Tentar múltiplas fontes em ordem de prioridade
            resultados = []

            # 1. DuckDuckGo Instant Answer
            try:
                url = "https://api.duckduckgo.com/"
                params = {
                    "q": query,
                    "format": "json",
                    "no_html": "1",
                    "skip_disambig": "1"
                }

                resp = self.session.get(url, params=params, timeout=8)
                if resp.status_code == 200:
                    data = resp.json()

                    # Abstract (resumo principal)
                    if data.get("Abstract"):
                        resultados.append(f"RESUMO: {data['Abstract'][:300]}")

                    # Related topics
                    for topic in data.get("RelatedTopics", [])[:max_resultados]:
                        if isinstance(topic, dict) and "Text" in topic:
                            resultados.append(f"INFO: {topic['Text'][:200]}")
                        elif isinstance(topic, str):
                            resultados.append(f"INFO: {topic[:200]}")
            except Exception as e:
                logger.debug(f"DuckDuckGo falhou: {e}")

            # 2. Wikipedia API (se for busca factual)
            if len(resultados) < max_resultados:
                try:
                    wiki_url = "https://en.wikipedia.org/api/rest_v1/page/summary/"
                    wiki_resp = self.session.get(wiki_url + query.replace(" ", "_"), timeout=5)
                    if wiki_resp.status_code == 200:
                        wiki_data = wiki_resp.json()
                        if wiki_data.get("extract"):
                            resultados.append(f"Wikipedia: {wiki_data['extract'][:250]}")
                except Exception as e:
                    logger.debug(f"Wikipedia falhou: {e}")

            # 3. Fallback com busca simulada baseada em conhecimento geral
            if not resultados:
                return self._fallback_busca_geral(query)

            # Formatar para o prompt da IA (não para usuário)
            resposta = f"INFORMAÇÕES SOBRE '{query.upper()}':\n\n" + "\n\n".join(resultados[:max_resultados])
            self.cache.set(cache_key, resposta)
            return resposta

        except Exception as e:
            logger.warning(f"Busca geral falhou: {e}")
            return self._fallback_busca_geral(query)

    def _fallback_busca_geral(self, query: str) -> str:
        """Fallback quando todas as fontes falham"""
        return f"INFORMAÇÕES GERAIS SOBRE '{query}': Não foi possível obter dados específicos da web no momento. Use conhecimento geral para responder."
    
    # ========================================================================
    # NOTÍCIAS DE ANGOLA (WEB SCRAPING)
    # ========================================================================
    
    def pesquisar_noticias_angola(self, limite: int = 5) -> str:
        """
        Busca notícias mais recentes de Angola via scraping
        
        Returns:
            String formatada com notícias
        """
        cache_key = "noticias_angola"
        cached = self.cache.get(cache_key)
        if cached:
            return cached
        
        todas_noticias = []
        
        try:
            # Tenta cada fonte
            todas_noticias.extend(self._buscar_angop())
            todas_noticias.extend(self._buscar_novojornal())
            todas_noticias.extend(self._buscar_jornaldeangola())
            
        except Exception as e:
            logger.error(f"Erro no scraping de notícias: {e}")
        
        # Remove duplicatas e limita
        vistos = set()
        unicas = []
        for n in todas_noticias:
            titulo_lower = n["titulo"].lower()
            if titulo_lower not in vistos and len(titulo_lower) > 20:
                vistos.add(titulo_lower)
                unicas.append(n)
                if len(unicas) >= limite:
                    break
        
        if not unicas:
            fallback = "Sem notícias recentes de Angola disponíveis no momento."
            self.cache.set(cache_key, fallback)
            return fallback
        
        # Formata resposta
        texto = "📰 NOTÍCIAS RECENTES DE ANGOLA:\n\n"
        for i, n in enumerate(unicas, 1):
            texto += f"[{i}] {n['titulo']}\n"
            if n.get('link'):
                texto += f"    🔗 {n['link']}\n"
            texto += "\n"
        
        self.cache.set(cache_key, texto.strip())
        return texto.strip()
    
    def _buscar_angop(self) -> List[Dict]:
        """Scraping da Angop"""
        try:
            r = self.session.get(self.fontes_angola[0], timeout=8)
            if r.status_code != 200:
                return []
            
            soup = BeautifulSoup(r.text, 'html.parser')
            itens = soup.select('.ultimas-noticias .item')[:3]
            noticias = []
            
            for item in itens:
                titulo = item.select_one('h3 a')
                link = item.select_one('a')
                if titulo and link:
                    href = link.get('href', '')
                    if isinstance(href, str):
                        full_link = "https://www.angop.ao" + href if href.startswith('/') else href
                    else:
                        full_link = "https://www.angop.ao" + str(href) if str(href).startswith('/') else str(href)
                    noticias.append({
                        "titulo": self._limpar_texto(titulo.get_text()),
                        "link": full_link,
                        "fonte": "Angop"
                    })
            
            return noticias
            
        except Exception as e:
            logger.warning(f"Angop scraping falhou: {e}")
            return []
    
    def _buscar_novojornal(self) -> List[Dict]:
        """Scraping do Novo Jornal"""
        try:
            r = self.session.get(self.fontes_angola[1], timeout=8)
            if r.status_code != 200:
                return []
            
            soup = BeautifulSoup(r.text, 'html.parser')
            itens = soup.select('.noticia-lista .titulo a')[:3]
            noticias = []
            
            for a in itens:
                noticias.append({
                    "titulo": self._limpar_texto(a.get_text()),
                    "link": a.get('href', ''),
                    "fonte": "Novo Jornal"
                })
            
            return noticias
            
        except Exception as e:
            logger.warning(f"Novo Jornal scraping falhou: {e}")
            return []
    
    def _buscar_jornaldeangola(self) -> List[Dict]:
        """Scraping do Jornal de Angola"""
        try:
            r = self.session.get(self.fontes_angola[2], timeout=8)
            if r.status_code != 200:
                return []
            
            soup = BeautifulSoup(r.text, 'html.parser')
            itens = soup.select('.ultimas .titulo a')[:3]
            noticias = []
            
            for a in itens:
                noticias.append({
                    "titulo": self._limpar_texto(a.get_text()),
                    "link": a.get('href', ''),
                    "fonte": "Jornal de Angola"
                })
            
            return noticias
            
        except Exception as e:
            logger.warning(f"Jornal de Angola scraping falhou: {e}")
            return []
    
    # ========================================================================
    # CLIMA/TEMPO
    # ========================================================================
    
    def buscar_clima(self, cidade: str = "Luanda") -> str:
        """
        Busca informações de clima usando wttr.in (gratuito)
        
        Args:
            cidade: Nome da cidade (padrão: Luanda)
        
        Returns:
            String com informações do clima
        """
        cache_key = f"clima_{cidade.lower()}"
        cached = self.cache.get(cache_key)
        if cached:
            return cached
        
        try:
            # wttr.in - serviço gratuito de clima
            url = f"https://wttr.in/{cidade}?format=j1"
            resp = self.session.get(url, timeout=8)
            
            if resp.status_code != 200:
                return f"Não consegui obter informações do clima em {cidade}."
            
            data = resp.json()
            
            # Extrai dados
            current = data['current_condition'][0]
            temp = current['temp_C']
            desc = current['lang_pt'][0]['value'] if 'lang_pt' in current else current['weatherDesc'][0]['value']
            humidity = current['humidity']
            
            resposta = f"🌤️ CLIMA EM {cidade.upper()}:\n\n"
            resposta += f"Temperatura: {temp}°C\n"
            resposta += f"Condição: {desc}\n"
            resposta += f"Umidade: {humidity}%"
            
            self.cache.set(cache_key, resposta)
            return resposta
            
        except Exception as e:
            logger.warning(f"Busca de clima falhou: {e}")
            return f"Não consegui obter informações do clima em {cidade} no momento."
    
    # ========================================================================
    # UTILIDADES
    # ========================================================================
    
    def _limpar_texto(self, texto: str) -> str:
        """Limpa e formata texto"""
        if not texto:
            return ""
        texto = re.sub(r'[\s\n\t]+', ' ', texto)
        return texto.strip()[:200]
    
    # ========================================================================
    # DETECÇÃO DE INTENÇÃO DE BUSCA
    # ========================================================================
    
    @staticmethod
    def detectar_intencao_busca(mensagem: str) -> Optional[str]:
        """
        Detecta se mensagem requer busca na web - MELHORADO

        Returns:
            "noticias" | "clima" | "busca_geral" | None
        """
        msg_lower = mensagem.lower()

        # PALAVRAS-CHAVE DE BUSCA DIRETAS (PRIORIDADE ALTA)
        palavras_busca_diretas = [
            "busca", "pesquisa", "pesquisar", "procurar", "procura",
            "web", "internet", "google", "wikipedia", "site",
            "informações", "dados", "saber", "conhecer", "descobrir",
            "encontrar", "localizar", "achar"
        ]

        # Verificar se contém palavras de busca diretas
        for palavra in palavras_busca_diretas:
            if palavra in msg_lower:
                # Se for sobre clima, priorizar clima
                if any(k in msg_lower for k in ["clima", "tempo", "temperatura", "chuva", "sol"]):
                    return "clima"
                # Se for sobre notícias, priorizar notícias
                elif any(k in msg_lower for k in ["notícias", "noticias", "novidades", "aconteceu", "news"]):
                    if "angola" in msg_lower or "angolano" in msg_lower:
                        return "noticias"
                    else:
                        return "busca_geral"
                else:
                    return "busca_geral"

        # Notícias (específicas de Angola)
        if any(k in msg_lower for k in ["notícias", "noticias", "novidades", "aconteceu", "news"]):
            if "angola" in msg_lower or "angolano" in msg_lower or "angola" in msg_lower:
                return "noticias"

        # Clima
        if any(k in msg_lower for k in ["clima", "tempo", "temperatura", "chuva", "sol"]):
            return "clima"

        # Busca geral (perguntas sobre fatos/eventos)
        palavras_chave_busca = [
            "quem é", "o que é", "onde fica", "quando foi", "como funciona",
            "definição", "significa", "história", "explicação", "significado",
            "qual é", "quais são", "quanto é", "quantos são"
        ]

        if any(k in msg_lower for k in palavras_chave_busca):
            return "busca_geral"

        # Perguntas com "?" também podem ativar busca (mais seletivo)
        if "?" in mensagem:
            palavras = mensagem.split()
            if len(palavras) > 2:  # Pelo menos 3 palavras para considerar busca
                # Verificar se é uma pergunta factual
                indicadores_pergunta = ["quem", "o que", "onde", "quando", "como", "por que", "qual", "quanto", "porquê", "porque"]
                if any(indicador in msg_lower for indicador in indicadores_pergunta):
                    return "busca_geral"

        return None


# === INSTÂNCIA GLOBAL (SINGLETON) ===
_web_search_instance = None

def get_web_search() -> WebSearch:
    """Retorna instância singleton do WebSearch"""
    global _web_search_instance
    if _web_search_instance is None:
        _web_search_instance = WebSearch()
    return _web_search_instance