PinkSky / server /internet_agent.py
FreshPixels's picture
Update server/internet_agent.py
c64f4ef verified
Raw
History Blame Contribute Delete
5.76 kB
"""Бесплатный интернет-агент с минимум 3 способами поиска"""
import re
import logging
import hashlib
from datetime import datetime, timedelta
from typing import Dict, List, Tuple, Any, Optional
from collections import Counter
import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
class FreeInternetAgent:
"""Интернет-агент с бесплатными поисковыми системами"""
def __init__(self, cache_ttl: int = 3600):
self.session = requests.Session()
retry = Retry(total=3, backoff_factor=1, status_forcelist=[500, 502, 503, 504])
self.session.mount("https://", HTTPAdapter(max_retries=retry))
self.session.headers.update({
'User-Agent': 'PinkSky/7.0 (Linux; Research)'
})
self.cache: Dict[str, Tuple[Any, datetime]] = {}
self.cache_ttl = cache_ttl
self.logger = logging.getLogger(__name__)
self._has_bs4 = False
try:
import bs4
self._has_bs4 = True
except ImportError:
pass
# SearXNG инстансы
self.searxng_instances = [
"https://searx.be",
"https://search.bus-hit.me",
"https://searx.nixnet.xyz",
"https://searx.tuxcloud.net",
"https://searx.moe",
]
# Асинхронная проверка доступности при старте
self.healthy_searxng = self._check_searxng_health()
def _check_searxng_health(self) -> List[str]:
healthy = []
for instance in self.searxng_instances:
try:
response = self.session.get(f"{instance}/health", timeout=5)
if response.status_code == 200:
healthy.append(instance)
except Exception as e:
self.logger.warning(f"SearXNG health check failed for {instance}: {e}")
self.logger.info(f"Healthy SearXNG instances: {len(healthy)}/{len(self.searxng_instances)}")
return healthy
def _get_cache_key(self, *args, **kwargs) -> str:
key = f"{args}_{sorted(kwargs.items())}"
return hashlib.md5(key.encode()).hexdigest()
def _get_from_cache(self, key: str) -> Optional[Any]:
if key in self.cache:
data, timestamp = self.cache[key]
if datetime.now() - timestamp < timedelta(seconds=self.cache_ttl):
return data
else:
del self.cache[key]
return None
def _save_to_cache(self, key: str, data: Any) -> None:
self.cache[key] = (data, datetime.now())
def search_web(self, query: str, num_results: int = 5) -> List[Dict[str, str]]:
cache_key = self._get_cache_key('search', query, num_results)
cached = self._get_from_cache(cache_key)
if cached is not None:
return cached
results = []
# Способ 1: SearXNG (мета-поиск)
results = self._search_searxng(query, num_results)
# Способ 2: DuckDuckGo API
if not results:
results = self._search_duckduckgo(query, num_results)
# Способ 3: Google
if not results:
results = self._search_google(query, num_results)
# Способ 4: Яндекс
if not results and any(ord(c) > 1024 for c in query):
results = self._search_yandex(query, num_results)
self._save_to_cache(cache_key, results)
return results
def _search_searxng(self, query: str, num_results: int) -> List[Dict[str, str]]:
if not self.healthy_searxng:
self.healthy_searxng = self._check_searxng_health()
if not self.healthy_searxng:
self.logger.warning("No healthy SearXNG instances available")
return []
results = []
for instance in self.healthy_searxng:
try:
url = f"{instance}/search"
params = {
"q": query,
"format": "json",
"categories": "general",
"engines": "google,bing,duckduckgo,startpage",
"language": "en",
"pageno": 1
}
response = self.session.get(url, params=params, timeout=20)
response.raise_for_status()
data = response.json()
if 'results' in data:
for item in data['results'][:num_results]:
results.append({
'title': item.get('title', '')[:100],
'url': item.get('url', ''),
'snippet': item.get('content', '')[:200],
'source': 'searxng',
'engine': item.get('engine', '')
})
self.logger.info(f"SearXNG ({instance}): {len(results)} results")
break
except Exception as e:
self.logger.warning(f"SearXNG error for {instance}: {e}")
self.healthy_searxng.remove(instance)
return results
# Удалить эту строку:
# INTERNET_AGENT = FreeInternetAgent(cache_ttl=3600)
# Вместо неё добавляем функцию-фабрику:
def get_internet_agent() -> FreeInternetAgent:
"""Lazy-инициализация интернет-агента с глобальным состоянием."""
if not hasattr(get_internet_agent, 'instance'):
get_internet_agent.instance = FreeInternetAgent(cache_ttl=3600)
return get_internet_agent.instance