Spaces:
Runtime error
Runtime error
| """Бесплатный интернет-агент с минимум 3 способами поиска""" | |
| import re | |
| import logging | |
| import hashlib | |
| from datetime import datetime, timedelta | |
| from typing import Dict, List, Tuple, Any, Optional | |
| from collections import Counter | |
| import requests | |
| from requests.adapters import HTTPAdapter | |
| from urllib3.util.retry import Retry | |
| class FreeInternetAgent: | |
| """Интернет-агент с бесплатными поисковыми системами""" | |
| def __init__(self, cache_ttl: int = 3600): | |
| self.session = requests.Session() | |
| retry = Retry(total=3, backoff_factor=1, status_forcelist=[500, 502, 503, 504]) | |
| self.session.mount("https://", HTTPAdapter(max_retries=retry)) | |
| self.session.headers.update({ | |
| 'User-Agent': 'PinkSky/7.0 (Linux; Research)' | |
| }) | |
| self.cache: Dict[str, Tuple[Any, datetime]] = {} | |
| self.cache_ttl = cache_ttl | |
| self.logger = logging.getLogger(__name__) | |
| self._has_bs4 = False | |
| try: | |
| import bs4 | |
| self._has_bs4 = True | |
| except ImportError: | |
| pass | |
| # SearXNG инстансы | |
| self.searxng_instances = [ | |
| "https://searx.be", | |
| "https://search.bus-hit.me", | |
| "https://searx.nixnet.xyz", | |
| "https://searx.tuxcloud.net", | |
| "https://searx.moe", | |
| ] | |
| # Асинхронная проверка доступности при старте | |
| self.healthy_searxng = self._check_searxng_health() | |
| def _check_searxng_health(self) -> List[str]: | |
| healthy = [] | |
| for instance in self.searxng_instances: | |
| try: | |
| response = self.session.get(f"{instance}/health", timeout=5) | |
| if response.status_code == 200: | |
| healthy.append(instance) | |
| except Exception as e: | |
| self.logger.warning(f"SearXNG health check failed for {instance}: {e}") | |
| self.logger.info(f"Healthy SearXNG instances: {len(healthy)}/{len(self.searxng_instances)}") | |
| return healthy | |
| def _get_cache_key(self, *args, **kwargs) -> str: | |
| key = f"{args}_{sorted(kwargs.items())}" | |
| return hashlib.md5(key.encode()).hexdigest() | |
| def _get_from_cache(self, key: str) -> Optional[Any]: | |
| if key in self.cache: | |
| data, timestamp = self.cache[key] | |
| if datetime.now() - timestamp < timedelta(seconds=self.cache_ttl): | |
| return data | |
| else: | |
| del self.cache[key] | |
| return None | |
| def _save_to_cache(self, key: str, data: Any) -> None: | |
| self.cache[key] = (data, datetime.now()) | |
| def search_web(self, query: str, num_results: int = 5) -> List[Dict[str, str]]: | |
| cache_key = self._get_cache_key('search', query, num_results) | |
| cached = self._get_from_cache(cache_key) | |
| if cached is not None: | |
| return cached | |
| results = [] | |
| # Способ 1: SearXNG (мета-поиск) | |
| results = self._search_searxng(query, num_results) | |
| # Способ 2: DuckDuckGo API | |
| if not results: | |
| results = self._search_duckduckgo(query, num_results) | |
| # Способ 3: Google | |
| if not results: | |
| results = self._search_google(query, num_results) | |
| # Способ 4: Яндекс | |
| if not results and any(ord(c) > 1024 for c in query): | |
| results = self._search_yandex(query, num_results) | |
| self._save_to_cache(cache_key, results) | |
| return results | |
| def _search_searxng(self, query: str, num_results: int) -> List[Dict[str, str]]: | |
| if not self.healthy_searxng: | |
| self.healthy_searxng = self._check_searxng_health() | |
| if not self.healthy_searxng: | |
| self.logger.warning("No healthy SearXNG instances available") | |
| return [] | |
| results = [] | |
| for instance in self.healthy_searxng: | |
| try: | |
| url = f"{instance}/search" | |
| params = { | |
| "q": query, | |
| "format": "json", | |
| "categories": "general", | |
| "engines": "google,bing,duckduckgo,startpage", | |
| "language": "en", | |
| "pageno": 1 | |
| } | |
| response = self.session.get(url, params=params, timeout=20) | |
| response.raise_for_status() | |
| data = response.json() | |
| if 'results' in data: | |
| for item in data['results'][:num_results]: | |
| results.append({ | |
| 'title': item.get('title', '')[:100], | |
| 'url': item.get('url', ''), | |
| 'snippet': item.get('content', '')[:200], | |
| 'source': 'searxng', | |
| 'engine': item.get('engine', '') | |
| }) | |
| self.logger.info(f"SearXNG ({instance}): {len(results)} results") | |
| break | |
| except Exception as e: | |
| self.logger.warning(f"SearXNG error for {instance}: {e}") | |
| self.healthy_searxng.remove(instance) | |
| return results | |
| # Удалить эту строку: | |
| # INTERNET_AGENT = FreeInternetAgent(cache_ttl=3600) | |
| # Вместо неё добавляем функцию-фабрику: | |
| def get_internet_agent() -> FreeInternetAgent: | |
| """Lazy-инициализация интернет-агента с глобальным состоянием.""" | |
| if not hasattr(get_internet_agent, 'instance'): | |
| get_internet_agent.instance = FreeInternetAgent(cache_ttl=3600) | |
| return get_internet_agent.instance | |