GitHub Actions
Auto-deploy from GitHub
b5ef2bb
"""
Response caching module for RAG pipeline.
Uses TTL (Time-To-Live) cache to store answers for frequent queries.
"""
from typing import Optional, Dict, Any
from cachetools import TTLCache
import hashlib
import logging
logger = logging.getLogger(__name__)
# Cache configuration
# Max 1000 items, expire after 1 hour (3600 seconds)
_response_cache = TTLCache(maxsize=1000, ttl=3600)
def _normalize_query(query: str) -> str:
"""
Normalize query for cache key generation.
- Lowercase
- Strip whitespace
- Remove common punctuation
"""
import string
return query.lower().strip().translate(str.maketrans("", "", string.punctuation))
def get_cached_response(query: str) -> Optional[Dict[str, Any]]:
"""
Get cached response for a query.
Args:
query: User question
Returns:
Cached response dict or None if not found
"""
key = _normalize_query(query)
if key in _response_cache:
logger.info(f"Cache hit for query: '{query}'")
return _response_cache[key]
return None
def cache_response(query: str, response: Dict[str, Any]):
"""
Cache a response for a query.
Args:
query: User question
response: Response dictionary to cache
"""
# Only cache if successful and has content
if not response or not response.get('answer'):
return
# Don't cache error responses or fallbacks if desired (optional policy)
if response.get('confidence') == 'low' and "don't have enough information" in response.get('answer', ''):
return
key = _normalize_query(query)
_response_cache[key] = response
logger.debug(f"Cached response for: '{query}'")
def clear_cache():
"""Clear all cached responses."""
_response_cache.clear()
logger.info("Cache cleared")