PDF-Assit_RAG / backend /app /cache.py
Param20h's picture
deploy: pure backend API with keywords fix
7c46845 unverified
Raw
History Blame Contribute Delete
5.87 kB
"""
Response caching utility for PDF-Assistant-RAG.
Supports two backends:
- Redis (preferred, for production)
- LRU in-memory cache (fallback for development or when Redis is unavailable)
Cache key is a SHA-256 hash of (user_id, document_id, question) to ensure
keys are short, stable, and unique across all user/question/document
combinations β€” never shared between different users.
"""
import hashlib
import json
import logging
import os
from typing import Optional
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Configuration β€” all values come from environment variables
# ---------------------------------------------------------------------------
CACHE_TTL: int = int(os.getenv("CACHE_TTL", "3600")) # seconds; default 1 hour
REDIS_URL: Optional[str] = os.getenv("REDIS_URL", None)
LRU_MAX_SIZE: int = int(os.getenv("CACHE_LRU_MAX_SIZE", "128"))
# ---------------------------------------------------------------------------
# Redis client (lazy init β€” only created when REDIS_URL is set)
# ---------------------------------------------------------------------------
_redis_client = None
_redis_available = False
def _get_redis():
"""
Lazily initialise the Redis client.
Returns the client if Redis is reachable, otherwise returns None.
After one failure, stops retrying for the process lifetime.
"""
global _redis_client, _redis_available
if _redis_client is not None:
return _redis_client if _redis_available else None
if not REDIS_URL:
logger.info("REDIS_URL not set β€” using LRU in-memory cache.")
_redis_available = False
return None
try:
import redis # soft import β€” redis package is optional
client = redis.from_url(
REDIS_URL,
decode_responses=True,
socket_connect_timeout=2,
)
client.ping()
_redis_client = client
_redis_available = True
logger.info("Redis cache connected at %s", REDIS_URL)
return _redis_client
except Exception as exc: # noqa: BLE001
logger.warning("Redis unavailable (%s) β€” falling back to LRU cache.", exc)
_redis_available = False
return None
# ---------------------------------------------------------------------------
# LRU in-memory fallback
# ---------------------------------------------------------------------------
_lru_store: dict = {}
_lru_order: list = []
def _lru_get(key: str) -> Optional[str]:
return _lru_store.get(key)
def _lru_set(key: str, value: str) -> None:
if key in _lru_store:
_lru_order.remove(key)
elif len(_lru_store) >= LRU_MAX_SIZE:
oldest = _lru_order.pop(0)
del _lru_store[oldest]
_lru_store[key] = value
_lru_order.append(key)
def _lru_delete(key: str) -> None:
if key in _lru_store:
del _lru_store[key]
_lru_order.remove(key)
# ---------------------------------------------------------------------------
# Public API β€” these are the only functions chat.py needs
# ---------------------------------------------------------------------------
def make_cache_key(user_id: str, document_id: str, question: str) -> str:
"""
Generate a stable, short cache key from user_id + document_id + question.
SHA-256 gives us a 64-char hex string that is:
- Always the same length regardless of question length
- Unique per (user_id, document_id, question) triple β€” user_id is
required so two different users asking the identical question never
collide on the same cache entry, even when document_id is empty
(cross-document queries against a user's own private knowledge base)
- Safe for Redis keys and dict keys
"""
raw = f"{user_id}:{document_id}:{question.strip().lower()}"
return hashlib.sha256(raw.encode("utf-8")).hexdigest()
def get_cached_response(user_id: str, document_id: str, question: str) -> Optional[str]:
"""
Look up a cached answer for a (user_id, document_id, question) triple.
Returns the answer string on hit, None on miss.
"""
key = make_cache_key(user_id, document_id, question)
r = _get_redis()
if r is not None:
try:
value = r.get(key)
if value:
logger.debug("Cache HIT (Redis) for key %s", key[:12])
return json.loads(value)
except Exception as exc: # noqa: BLE001
logger.warning("Redis GET failed (%s) β€” checking LRU.", exc)
value = _lru_get(key)
if value:
logger.debug("Cache HIT (LRU) for key %s", key[:12])
return json.loads(value)
logger.debug("Cache MISS for key %s", key[:12])
return None
def set_cached_response(user_id: str, document_id: str, question: str, answer: str) -> None:
"""
Store an answer. Tries Redis first; falls back to LRU.
TTL is controlled by the CACHE_TTL environment variable.
"""
key = make_cache_key(user_id, document_id, question)
serialised = json.dumps(answer)
r = _get_redis()
if r is not None:
try:
r.setex(key, CACHE_TTL, serialised)
logger.debug("Cache SET (Redis) key %s TTL %ds", key[:12], CACHE_TTL)
return
except Exception as exc: # noqa: BLE001
logger.warning("Redis SET failed (%s) β€” storing in LRU.", exc)
_lru_set(key, serialised)
logger.debug("Cache SET (LRU) key %s", key[:12])
def invalidate_cache(user_id: str, document_id: str, question: str) -> None:
"""Remove one cache entry β€” useful when a document is re-indexed."""
key = make_cache_key(user_id, document_id, question)
r = _get_redis()
if r is not None:
try:
r.delete(key)
except Exception: # noqa: BLE001
pass
_lru_delete(key)