Spaces:
Running
Running
File size: 1,848 Bytes
b5ef2bb | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 | """
Response caching module for RAG pipeline.
Uses TTL (Time-To-Live) cache to store answers for frequent queries.
"""
from typing import Optional, Dict, Any
from cachetools import TTLCache
import hashlib
import logging
logger = logging.getLogger(__name__)
# Cache configuration
# Max 1000 items, expire after 1 hour (3600 seconds)
_response_cache = TTLCache(maxsize=1000, ttl=3600)
def _normalize_query(query: str) -> str:
"""
Normalize query for cache key generation.
- Lowercase
- Strip whitespace
- Remove common punctuation
"""
import string
return query.lower().strip().translate(str.maketrans("", "", string.punctuation))
def get_cached_response(query: str) -> Optional[Dict[str, Any]]:
"""
Get cached response for a query.
Args:
query: User question
Returns:
Cached response dict or None if not found
"""
key = _normalize_query(query)
if key in _response_cache:
logger.info(f"Cache hit for query: '{query}'")
return _response_cache[key]
return None
def cache_response(query: str, response: Dict[str, Any]):
"""
Cache a response for a query.
Args:
query: User question
response: Response dictionary to cache
"""
# Only cache if successful and has content
if not response or not response.get('answer'):
return
# Don't cache error responses or fallbacks if desired (optional policy)
if response.get('confidence') == 'low' and "don't have enough information" in response.get('answer', ''):
return
key = _normalize_query(query)
_response_cache[key] = response
logger.debug(f"Cached response for: '{query}'")
def clear_cache():
"""Clear all cached responses."""
_response_cache.clear()
logger.info("Cache cleared")
|