Commit
Β·
91e1308
1
Parent(s):
58b869c
refactor SearchProvider and its subclasses for improved readability and consistency
Browse files
app.py
CHANGED
|
@@ -270,56 +270,99 @@ class CacheManager:
|
|
| 270 |
def __contains__(self, key): return key in self._cache and (time.time()-self._timestamps.get(key,0)<self.ttl)
|
| 271 |
|
| 272 |
class SearchProvider(ABC):
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
@property @abstractmethod
|
| 279 |
-
def provider_name(self) -> str: pass
|
| 280 |
@abstractmethod
|
| 281 |
-
def _perform_search(self, query: str, max_results: int) -> Optional[List[Dict[str, str]]]:
|
|
|
|
|
|
|
| 282 |
def search(self, query: str, max_results: int) -> Optional[List[Dict[str, str]]]:
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 291 |
|
| 292 |
class GoogleProvider(SearchProvider):
|
| 293 |
-
@property
|
|
|
|
|
|
|
|
|
|
| 294 |
def __init__(self, config_dict: Dict):
|
| 295 |
super().__init__(config_dict)
|
| 296 |
self._api_key = self.provider_config.get("google_api_key")
|
| 297 |
self._cse_id = self.provider_config.get("google_cse_id")
|
| 298 |
self._timeout = self.provider_config.get("google_timeout", 8)
|
| 299 |
-
if self._api_key and self._cse_id:
|
| 300 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 301 |
def _perform_search(self, query: str, max_results: int) -> Optional[List[Dict[str, str]]]:
|
| 302 |
try:
|
| 303 |
-
params = {
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 311 |
|
| 312 |
class TavilyProvider(SearchProvider):
|
| 313 |
-
@property
|
|
|
|
|
|
|
|
|
|
| 314 |
def __init__(self, config_dict: Dict):
|
| 315 |
super().__init__(config_dict)
|
| 316 |
self._api_key = self.provider_config.get("tavily_api_key")
|
| 317 |
self._search_depth = self.provider_config.get("tavily_depth", "basic")
|
| 318 |
if self._api_key and TavilyClient:
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
|
|
|
| 323 |
def _perform_search(self, query: str, max_results: int) -> Optional[List[Dict[str, str]]]:
|
| 324 |
if not self._enabled: return None
|
| 325 |
try:
|
|
@@ -330,7 +373,9 @@ class TavilyProvider(SearchProvider):
|
|
| 330 |
except Exception as e: gaia_logger.warning(f"[{self.provider_name}] Search fail: '{query[:70]}': {e}"); return None
|
| 331 |
|
| 332 |
class DuckDuckGoProvider(SearchProvider):
|
| 333 |
-
@property
|
|
|
|
|
|
|
| 334 |
def __init__(self, config_dict: Dict):
|
| 335 |
super().__init__(config_dict)
|
| 336 |
if DDGS:
|
|
|
|
| 270 |
def __contains__(self, key): return key in self._cache and (time.time()-self._timestamps.get(key,0)<self.ttl)
|
| 271 |
|
| 272 |
class SearchProvider(ABC):
|
| 273 |
+
@property
|
| 274 |
+
@abstractmethod
|
| 275 |
+
def provider_name(self) -> str:
|
| 276 |
+
pass
|
| 277 |
+
|
|
|
|
|
|
|
| 278 |
@abstractmethod
|
| 279 |
+
def _perform_search(self, query: str, max_results: int) -> Optional[List[Dict[str, str]]]:
|
| 280 |
+
pass
|
| 281 |
+
|
| 282 |
def search(self, query: str, max_results: int) -> Optional[List[Dict[str, str]]]:
|
| 283 |
+
if not self._enabled:
|
| 284 |
+
gaia_logger.debug(f"[{self.provider_name}] Skip: Not enabled.")
|
| 285 |
+
return None
|
| 286 |
+
if self._quota_limit != float('inf') and self._quota_used >= self._quota_limit:
|
| 287 |
+
gaia_logger.warning(f"[{self.provider_name}] Skip: Quota ({self._quota_used}/{int(self._quota_limit)})")
|
| 288 |
+
return None
|
| 289 |
+
usage_str = ""
|
| 290 |
+
if self._quota_limit != float('inf'):
|
| 291 |
+
self._quota_used += 1
|
| 292 |
+
usage_str = f"({self._quota_used}/{int(self._quota_limit)}) "
|
| 293 |
+
gaia_logger.info(f"[{self.provider_name}] {usage_str}Search: '{query[:70]}...'")
|
| 294 |
+
return self._perform_search(query, max_results)
|
| 295 |
+
|
| 296 |
+
def available(self) -> bool:
|
| 297 |
+
return self._enabled
|
| 298 |
|
| 299 |
class GoogleProvider(SearchProvider):
|
| 300 |
+
@property
|
| 301 |
+
def provider_name(self) -> str:
|
| 302 |
+
return "Google"
|
| 303 |
+
|
| 304 |
def __init__(self, config_dict: Dict):
|
| 305 |
super().__init__(config_dict)
|
| 306 |
self._api_key = self.provider_config.get("google_api_key")
|
| 307 |
self._cse_id = self.provider_config.get("google_cse_id")
|
| 308 |
self._timeout = self.provider_config.get("google_timeout", 8)
|
| 309 |
+
if self._api_key and self._cse_id:
|
| 310 |
+
self._enabled = True
|
| 311 |
+
gaia_logger.info(f"β {self.provider_name} API configured.")
|
| 312 |
+
else:
|
| 313 |
+
self._enabled = False
|
| 314 |
+
gaia_logger.warning(f"β {self.provider_name} API key/CSE ID missing in RAG config.")
|
| 315 |
+
|
| 316 |
def _perform_search(self, query: str, max_results: int) -> Optional[List[Dict[str, str]]]:
|
| 317 |
try:
|
| 318 |
+
params = {
|
| 319 |
+
'key': self._api_key,
|
| 320 |
+
'cx': self._cse_id,
|
| 321 |
+
'q': query,
|
| 322 |
+
'num': max_results,
|
| 323 |
+
'safe': 'active'
|
| 324 |
+
}
|
| 325 |
+
response = requests.get(
|
| 326 |
+
"https://www.googleapis.com/customsearch/v1",
|
| 327 |
+
params=params,
|
| 328 |
+
timeout=self._timeout
|
| 329 |
+
)
|
| 330 |
+
response.raise_for_status()
|
| 331 |
+
data = response.json()
|
| 332 |
+
items = data.get('items', [])
|
| 333 |
+
if not items:
|
| 334 |
+
gaia_logger.info(f"[{self.provider_name}] No results for '{query[:70]}'")
|
| 335 |
+
return []
|
| 336 |
+
return [{
|
| 337 |
+
'href': i.get('link'),
|
| 338 |
+
'title': i.get('title', ''),
|
| 339 |
+
'body': i.get('snippet', '')
|
| 340 |
+
} for i in items]
|
| 341 |
+
except requests.exceptions.Timeout:
|
| 342 |
+
gaia_logger.warning(f"[{self.provider_name}] Timeout: '{query[:70]}'")
|
| 343 |
+
return None
|
| 344 |
+
except requests.exceptions.RequestException as e:
|
| 345 |
+
gaia_logger.warning(f"[{self.provider_name}] RequestEx: '{query[:70]}': {e}")
|
| 346 |
+
return None
|
| 347 |
+
except Exception as e:
|
| 348 |
+
gaia_logger.error(f"[{self.provider_name}] Error: '{query[:70]}': {e}", exc_info=True)
|
| 349 |
+
return None
|
| 350 |
|
| 351 |
class TavilyProvider(SearchProvider):
|
| 352 |
+
@property
|
| 353 |
+
def provider_name(self) -> str:
|
| 354 |
+
return "Tavily"
|
| 355 |
+
|
| 356 |
def __init__(self, config_dict: Dict):
|
| 357 |
super().__init__(config_dict)
|
| 358 |
self._api_key = self.provider_config.get("tavily_api_key")
|
| 359 |
self._search_depth = self.provider_config.get("tavily_depth", "basic")
|
| 360 |
if self._api_key and TavilyClient:
|
| 361 |
+
self._enabled = True
|
| 362 |
+
gaia_logger.info(f"β {self.provider_name} API configured.")
|
| 363 |
+
else:
|
| 364 |
+
self._enabled = False
|
| 365 |
+
gaia_logger.warning(f"β {self.provider_name} API key missing or TavilyClient not available in config.")
|
| 366 |
def _perform_search(self, query: str, max_results: int) -> Optional[List[Dict[str, str]]]:
|
| 367 |
if not self._enabled: return None
|
| 368 |
try:
|
|
|
|
| 373 |
except Exception as e: gaia_logger.warning(f"[{self.provider_name}] Search fail: '{query[:70]}': {e}"); return None
|
| 374 |
|
| 375 |
class DuckDuckGoProvider(SearchProvider):
|
| 376 |
+
@property
|
| 377 |
+
def provider_name(self) -> str:
|
| 378 |
+
return "DuckDuckGo"
|
| 379 |
def __init__(self, config_dict: Dict):
|
| 380 |
super().__init__(config_dict)
|
| 381 |
if DDGS:
|