Spaces:
Sleeping
Sleeping
File size: 3,877 Bytes
e63c592 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 |
import json
from threading import Lock
from typing import Any, Dict, Hashable, Optional, Tuple
from cachetools import TTLCache
from app.core.config import get_settings
from app.core.logging import get_logger
logger = get_logger(__name__)
_settings = get_settings()
_CACHE_ENABLED: bool = getattr(_settings, "CACHE_ENABLED", True)
# TTLs are intentionally short and in-code defaults; no env required.
_SEARCH_TTL_SECONDS = 60
_CHAT_TTL_SECONDS = 60
_search_cache: TTLCache = TTLCache(maxsize=1024, ttl=_SEARCH_TTL_SECONDS)
_chat_cache: TTLCache = TTLCache(maxsize=512, ttl=_CHAT_TTL_SECONDS)
_lock = Lock()
_search_hits: int = 0
_search_misses: int = 0
_chat_hits: int = 0
_chat_misses: int = 0
def cache_enabled() -> bool:
return _CACHE_ENABLED
def _make_search_key(
namespace: str,
query: str,
top_k: int,
filters: Optional[Dict[str, Any]],
) -> Hashable:
filters_json = (
json.dumps(filters, sort_keys=True, separators=(",", ":"))
if filters is not None
else ""
)
return (namespace, query, int(top_k), filters_json)
def _make_chat_key(
namespace: str,
query: str,
top_k: int,
min_score: float,
use_web_fallback: bool,
) -> Hashable:
return (namespace, query, int(top_k), float(min_score), bool(use_web_fallback))
def get_search_cached(
namespace: str,
query: str,
top_k: int,
filters: Optional[Dict[str, Any]],
) -> Optional[Any]:
"""Return cached search results or None."""
global _search_hits, _search_misses
if not _CACHE_ENABLED:
return None
key = _make_search_key(namespace, query, top_k, filters)
with _lock:
if key in _search_cache:
_search_hits += 1
logger.info(
"Search cache hit namespace='%s' query='%s' top_k=%d",
namespace,
query,
top_k,
)
return _search_cache[key]
_search_misses += 1
logger.info(
"Search cache miss namespace='%s' query='%s' top_k=%d",
namespace,
query,
top_k,
)
return None
def set_search_cached(
namespace: str,
query: str,
top_k: int,
filters: Optional[Dict[str, Any]],
value: Any,
) -> None:
if not _CACHE_ENABLED:
return
key = _make_search_key(namespace, query, top_k, filters)
with _lock:
_search_cache[key] = value
def get_chat_cached(
namespace: str,
query: str,
top_k: int,
min_score: float,
use_web_fallback: bool,
) -> Optional[Any]:
"""Return cached chat response or None.
Only used when chat_history is empty.
"""
global _chat_hits, _chat_misses
if not _CACHE_ENABLED:
return None
key = _make_chat_key(namespace, query, top_k, min_score, use_web_fallback)
with _lock:
if key in _chat_cache:
_chat_hits += 1
logger.info(
"Chat cache hit namespace='%s' query='%s' top_k=%d",
namespace,
query,
top_k,
)
return _chat_cache[key]
_chat_misses += 1
logger.info(
"Chat cache miss namespace='%s' query='%s' top_k=%d",
namespace,
query,
top_k,
)
return None
def set_chat_cached(
namespace: str,
query: str,
top_k: int,
min_score: float,
use_web_fallback: bool,
value: Any,
) -> None:
if not _CACHE_ENABLED:
return
key = _make_chat_key(namespace, query, top_k, min_score, use_web_fallback)
with _lock:
_chat_cache[key] = value
def get_cache_stats() -> Dict[str, int]:
"""Return a snapshot of cache hit/miss counters."""
return {
"search_hits": _search_hits,
"search_misses": _search_misses,
"chat_hits": _chat_hits,
"chat_misses": _chat_misses,
} |