Spaces:
Running
Running
| """ | |
| Response caching utility for PDF-Assistant-RAG. | |
| Supports two backends: | |
| - Redis (preferred, for production) | |
| - LRU in-memory cache (fallback for development or when Redis is unavailable) | |
| Cache key is a SHA-256 hash of (user_id, document_id, question) to ensure | |
| keys are short, stable, and unique across all user/question/document | |
| combinations β never shared between different users. | |
| """ | |
| import hashlib | |
| import json | |
| import logging | |
| import os | |
| from typing import Optional | |
| logger = logging.getLogger(__name__) | |
| # --------------------------------------------------------------------------- | |
| # Configuration β all values come from environment variables | |
| # --------------------------------------------------------------------------- | |
| CACHE_TTL: int = int(os.getenv("CACHE_TTL", "3600")) # seconds; default 1 hour | |
| REDIS_URL: Optional[str] = os.getenv("REDIS_URL", None) | |
| LRU_MAX_SIZE: int = int(os.getenv("CACHE_LRU_MAX_SIZE", "128")) | |
| # --------------------------------------------------------------------------- | |
| # Redis client (lazy init β only created when REDIS_URL is set) | |
| # --------------------------------------------------------------------------- | |
| _redis_client = None | |
| _redis_available = False | |
| def _get_redis(): | |
| """ | |
| Lazily initialise the Redis client. | |
| Returns the client if Redis is reachable, otherwise returns None. | |
| After one failure, stops retrying for the process lifetime. | |
| """ | |
| global _redis_client, _redis_available | |
| if _redis_client is not None: | |
| return _redis_client if _redis_available else None | |
| if not REDIS_URL: | |
| logger.info("REDIS_URL not set β using LRU in-memory cache.") | |
| _redis_available = False | |
| return None | |
| try: | |
| import redis # soft import β redis package is optional | |
| client = redis.from_url( | |
| REDIS_URL, | |
| decode_responses=True, | |
| socket_connect_timeout=2, | |
| ) | |
| client.ping() | |
| _redis_client = client | |
| _redis_available = True | |
| logger.info("Redis cache connected at %s", REDIS_URL) | |
| return _redis_client | |
| except Exception as exc: # noqa: BLE001 | |
| logger.warning("Redis unavailable (%s) β falling back to LRU cache.", exc) | |
| _redis_available = False | |
| return None | |
| # --------------------------------------------------------------------------- | |
| # LRU in-memory fallback | |
| # --------------------------------------------------------------------------- | |
| _lru_store: dict = {} | |
| _lru_order: list = [] | |
| def _lru_get(key: str) -> Optional[str]: | |
| return _lru_store.get(key) | |
| def _lru_set(key: str, value: str) -> None: | |
| if key in _lru_store: | |
| _lru_order.remove(key) | |
| elif len(_lru_store) >= LRU_MAX_SIZE: | |
| oldest = _lru_order.pop(0) | |
| del _lru_store[oldest] | |
| _lru_store[key] = value | |
| _lru_order.append(key) | |
| def _lru_delete(key: str) -> None: | |
| if key in _lru_store: | |
| del _lru_store[key] | |
| _lru_order.remove(key) | |
| # --------------------------------------------------------------------------- | |
| # Public API β these are the only functions chat.py needs | |
| # --------------------------------------------------------------------------- | |
| def make_cache_key(user_id: str, document_id: str, question: str) -> str: | |
| """ | |
| Generate a stable, short cache key from user_id + document_id + question. | |
| SHA-256 gives us a 64-char hex string that is: | |
| - Always the same length regardless of question length | |
| - Unique per (user_id, document_id, question) triple β user_id is | |
| required so two different users asking the identical question never | |
| collide on the same cache entry, even when document_id is empty | |
| (cross-document queries against a user's own private knowledge base) | |
| - Safe for Redis keys and dict keys | |
| """ | |
| raw = f"{user_id}:{document_id}:{question.strip().lower()}" | |
| return hashlib.sha256(raw.encode("utf-8")).hexdigest() | |
| def get_cached_response(user_id: str, document_id: str, question: str) -> Optional[str]: | |
| """ | |
| Look up a cached answer for a (user_id, document_id, question) triple. | |
| Returns the answer string on hit, None on miss. | |
| """ | |
| key = make_cache_key(user_id, document_id, question) | |
| r = _get_redis() | |
| if r is not None: | |
| try: | |
| value = r.get(key) | |
| if value: | |
| logger.debug("Cache HIT (Redis) for key %s", key[:12]) | |
| return json.loads(value) | |
| except Exception as exc: # noqa: BLE001 | |
| logger.warning("Redis GET failed (%s) β checking LRU.", exc) | |
| value = _lru_get(key) | |
| if value: | |
| logger.debug("Cache HIT (LRU) for key %s", key[:12]) | |
| return json.loads(value) | |
| logger.debug("Cache MISS for key %s", key[:12]) | |
| return None | |
| def set_cached_response(user_id: str, document_id: str, question: str, answer: str) -> None: | |
| """ | |
| Store an answer. Tries Redis first; falls back to LRU. | |
| TTL is controlled by the CACHE_TTL environment variable. | |
| """ | |
| key = make_cache_key(user_id, document_id, question) | |
| serialised = json.dumps(answer) | |
| r = _get_redis() | |
| if r is not None: | |
| try: | |
| r.setex(key, CACHE_TTL, serialised) | |
| logger.debug("Cache SET (Redis) key %s TTL %ds", key[:12], CACHE_TTL) | |
| return | |
| except Exception as exc: # noqa: BLE001 | |
| logger.warning("Redis SET failed (%s) β storing in LRU.", exc) | |
| _lru_set(key, serialised) | |
| logger.debug("Cache SET (LRU) key %s", key[:12]) | |
| def invalidate_cache(user_id: str, document_id: str, question: str) -> None: | |
| """Remove one cache entry β useful when a document is re-indexed.""" | |
| key = make_cache_key(user_id, document_id, question) | |
| r = _get_redis() | |
| if r is not None: | |
| try: | |
| r.delete(key) | |
| except Exception: # noqa: BLE001 | |
| pass | |
| _lru_delete(key) | |