Spaces:

akra35567
/

OLLAMA

Sleeping

File size: 3,896 Bytes

51e76e2

# web_search.py — V27 — SERPER API + ANGOLA SCRAPING
import time
import re
import requests
from typing import List, Dict
from loguru import logger
from bs4 import BeautifulSoup
import os
import config

class SimpleCache:
    def __init__(self, ttl: int = 900):
        self.ttl = ttl
        self._data = {}
    def get(self, key):
        if key in self._data and time.time() - self._data[key][1] < self.ttl:
            return self._data[key][0]
        return None
    def set(self, key, value):
        self._data[key] = (value, time.time())

class WebSearch:
    def __init__(self):
        self.cache = SimpleCache()
        self.session = requests.Session()
        self.session.headers.update({
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
        })
        self.fontes_angola = [
            "https://www.angop.ao/ultimas",
            "https://www.novojornal.co.ao/",
            "https://www.jornaldeangola.ao/",
            "https://www.verangola.net/va/noticias"
        ]

    def _limpar(self, texto: str) -> str:
        return re.sub(r'\s+', ' ', texto).strip()[:200]

    def _scraping_angola(self) -> str:
        key = "noticias_angola"
        cached = self.cache.get(key)
        if cached: return cached

        noticias = []
        for url in self.fontes_angola:
            try:
                r = self.session.get(url, timeout=8)
                if r.status_code != 200: continue
                soup = BeautifulSoup(r.text, 'html.parser')
                for item in soup.select('.titulo a, h3 a, .noticia-item a')[:3]:
                    titulo = self._limpar(item.get_text())
                    if titulo and len(titulo) > 20:
                        noticias.append(f"• {titulo}")
            except: continue

        if not noticias:
            result = "Sem notícias recentes de Angola."
        else:
            result = "NOTÍCIAS DE ANGOLA:\n" + "\n".join(noticias[:5])
        
        self.cache.set(key, result)
        return result

    def _busca_geral(self, query: str) -> str:
        key = f"geral_{query.lower()}"
        cached = self.cache.get(key)
        if cached: return cached

        if not config.SERPER_API_KEY:
            return "Busca geral não configurada. Configure SERPER_API_KEY no HF Space Secrets."

        try:
            # SERPER API REAL
            url = "https://google.serper.dev/search"
            payload = {"q": query}
            headers = {"X-API-KEY": config.SERPER_API_KEY}
            r = requests.post(url, json=payload, headers=headers, timeout=10)
            
            if r.status_code != 200:
                return "Erro na API de busca geral."

            data = r.json()
            results = []
            for result in data.get('organic', [])[:5]:
                title = result.get('title', '')[:100]
                snippet = result.get('snippet', '')[:150]
                if title:
                    results.append(f"• {title}: {snippet}")

            if not results:
                result = "Nada encontrado na busca geral."
            else:
                result = "INFORMAÇÕES:\n" + "\n".join(results)

        except Exception as e:
            logger.error(f"Erro Serper: {e}")
            result = "Erro na busca geral."

        self.cache.set(key, result)
        return result

    def pesquisar(self, mensagem: str) -> str:
        """Akira decide sozinha se precisa pesquisar (sem palavras-chave no prompt)"""
        # Angola sempre
        if any(w in mensagem.lower() for w in ["angola", "luanda", "notícia", "jornal", "governo", "presidente"]):
            return self._scraping_angola()
        # Conhecimento geral
        if any(w in mensagem.lower() for w in ["quem é", "o que é", "quando", "onde", "como", "por que", "quanto", "qual"]):
            return self._busca_geral(mensagem)
        return ""