"""Бесплатный интернет-агент с минимум 3 способами поиска""" import re import logging import hashlib from datetime import datetime, timedelta from typing import Dict, List, Tuple, Any, Optional from collections import Counter import requests from requests.adapters import HTTPAdapter from urllib3.util.retry import Retry class FreeInternetAgent: """Интернет-агент с бесплатными поисковыми системами""" def __init__(self, cache_ttl: int = 3600): self.session = requests.Session() retry = Retry(total=3, backoff_factor=1, status_forcelist=[500, 502, 503, 504]) self.session.mount("https://", HTTPAdapter(max_retries=retry)) self.session.headers.update({ 'User-Agent': 'PinkSky/7.0 (Linux; Research)' }) self.cache: Dict[str, Tuple[Any, datetime]] = {} self.cache_ttl = cache_ttl self.logger = logging.getLogger(__name__) self._has_bs4 = False try: import bs4 self._has_bs4 = True except ImportError: pass # SearXNG инстансы self.searxng_instances = [ "https://searx.be", "https://search.bus-hit.me", "https://searx.nixnet.xyz", "https://searx.tuxcloud.net", "https://searx.moe", ] # Асинхронная проверка доступности при старте self.healthy_searxng = self._check_searxng_health() def _check_searxng_health(self) -> List[str]: healthy = [] for instance in self.searxng_instances: try: response = self.session.get(f"{instance}/health", timeout=5) if response.status_code == 200: healthy.append(instance) except Exception as e: self.logger.warning(f"SearXNG health check failed for {instance}: {e}") self.logger.info(f"Healthy SearXNG instances: {len(healthy)}/{len(self.searxng_instances)}") return healthy def _get_cache_key(self, *args, **kwargs) -> str: key = f"{args}_{sorted(kwargs.items())}" return hashlib.md5(key.encode()).hexdigest() def _get_from_cache(self, key: str) -> Optional[Any]: if key in self.cache: data, timestamp = self.cache[key] if datetime.now() - timestamp < timedelta(seconds=self.cache_ttl): return data else: del self.cache[key] return None def _save_to_cache(self, key: str, data: Any) -> None: self.cache[key] = (data, datetime.now()) def search_web(self, query: str, num_results: int = 5) -> List[Dict[str, str]]: cache_key = self._get_cache_key('search', query, num_results) cached = self._get_from_cache(cache_key) if cached is not None: return cached results = [] # Способ 1: SearXNG (мета-поиск) results = self._search_searxng(query, num_results) # Способ 2: DuckDuckGo API if not results: results = self._search_duckduckgo(query, num_results) # Способ 3: Google if not results: results = self._search_google(query, num_results) # Способ 4: Яндекс if not results and any(ord(c) > 1024 for c in query): results = self._search_yandex(query, num_results) self._save_to_cache(cache_key, results) return results def _search_searxng(self, query: str, num_results: int) -> List[Dict[str, str]]: if not self.healthy_searxng: self.healthy_searxng = self._check_searxng_health() if not self.healthy_searxng: self.logger.warning("No healthy SearXNG instances available") return [] results = [] for instance in self.healthy_searxng: try: url = f"{instance}/search" params = { "q": query, "format": "json", "categories": "general", "engines": "google,bing,duckduckgo,startpage", "language": "en", "pageno": 1 } response = self.session.get(url, params=params, timeout=20) response.raise_for_status() data = response.json() if 'results' in data: for item in data['results'][:num_results]: results.append({ 'title': item.get('title', '')[:100], 'url': item.get('url', ''), 'snippet': item.get('content', '')[:200], 'source': 'searxng', 'engine': item.get('engine', '') }) self.logger.info(f"SearXNG ({instance}): {len(results)} results") break except Exception as e: self.logger.warning(f"SearXNG error for {instance}: {e}") self.healthy_searxng.remove(instance) return results # Удалить эту строку: # INTERNET_AGENT = FreeInternetAgent(cache_ttl=3600) # Вместо неё добавляем функцию-фабрику: def get_internet_agent() -> FreeInternetAgent: """Lazy-инициализация интернет-агента с глобальным состоянием.""" if not hasattr(get_internet_agent, 'instance'): get_internet_agent.instance = FreeInternetAgent(cache_ttl=3600) return get_internet_agent.instance