# web_search.py — V27 — SERPER API + ANGOLA SCRAPING import time import re import requests from typing import List, Dict from loguru import logger from bs4 import BeautifulSoup import os import config class SimpleCache: def __init__(self, ttl: int = 900): self.ttl = ttl self._data = {} def get(self, key): if key in self._data and time.time() - self._data[key][1] < self.ttl: return self._data[key][0] return None def set(self, key, value): self._data[key] = (value, time.time()) class WebSearch: def __init__(self): self.cache = SimpleCache() self.session = requests.Session() self.session.headers.update({ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36" }) self.fontes_angola = [ "https://www.angop.ao/ultimas", "https://www.novojornal.co.ao/", "https://www.jornaldeangola.ao/", "https://www.verangola.net/va/noticias" ] def _limpar(self, texto: str) -> str: return re.sub(r'\s+', ' ', texto).strip()[:200] def _scraping_angola(self) -> str: key = "noticias_angola" cached = self.cache.get(key) if cached: return cached noticias = [] for url in self.fontes_angola: try: r = self.session.get(url, timeout=8) if r.status_code != 200: continue soup = BeautifulSoup(r.text, 'html.parser') for item in soup.select('.titulo a, h3 a, .noticia-item a')[:3]: titulo = self._limpar(item.get_text()) if titulo and len(titulo) > 20: noticias.append(f"• {titulo}") except: continue if not noticias: result = "Sem notícias recentes de Angola." else: result = "NOTÍCIAS DE ANGOLA:\n" + "\n".join(noticias[:5]) self.cache.set(key, result) return result def _busca_geral(self, query: str) -> str: key = f"geral_{query.lower()}" cached = self.cache.get(key) if cached: return cached if not config.SERPER_API_KEY: return "Busca geral não configurada. Configure SERPER_API_KEY no HF Space Secrets." try: # SERPER API REAL url = "https://google.serper.dev/search" payload = {"q": query} headers = {"X-API-KEY": config.SERPER_API_KEY} r = requests.post(url, json=payload, headers=headers, timeout=10) if r.status_code != 200: return "Erro na API de busca geral." data = r.json() results = [] for result in data.get('organic', [])[:5]: title = result.get('title', '')[:100] snippet = result.get('snippet', '')[:150] if title: results.append(f"• {title}: {snippet}") if not results: result = "Nada encontrado na busca geral." else: result = "INFORMAÇÕES:\n" + "\n".join(results) except Exception as e: logger.error(f"Erro Serper: {e}") result = "Erro na busca geral." self.cache.set(key, result) return result def pesquisar(self, mensagem: str) -> str: """Akira decide sozinha se precisa pesquisar (sem palavras-chave no prompt)""" # Angola sempre if any(w in mensagem.lower() for w in ["angola", "luanda", "notícia", "jornal", "governo", "presidente"]): return self._scraping_angola() # Conhecimento geral if any(w in mensagem.lower() for w in ["quem é", "o que é", "quando", "onde", "como", "por que", "quanto", "qual"]): return self._busca_geral(mensagem) return ""