Spaces:
Running
Running
| # DEPENDENCIES | |
| import time | |
| import pickle | |
| from typing import Any | |
| from typing import Dict | |
| from typing import Optional | |
| from pathlib import Path | |
| from collections import OrderedDict | |
| from config.settings import get_settings | |
| from config.logging_config import get_logger | |
| # Setup Settings and Logging | |
| settings = get_settings() | |
| logger = get_logger(__name__) | |
| class LRUCache: | |
| """ | |
| Least Recently Used cache with TTL support | |
| """ | |
| def __init__(self, max_size: int = 1000, ttl: int = 3600): | |
| self.max_size = max_size | |
| self.ttl = ttl | |
| self.cache = OrderedDict() | |
| self.timestamps = {} | |
| def get(self, key: str) -> Optional[Any]: | |
| """ | |
| Get item from cache | |
| """ | |
| if key not in self.cache: | |
| return None | |
| # Check TTL | |
| if self._is_expired(key): | |
| self.delete(key) | |
| return None | |
| # Move to end (most recently used) | |
| self.cache.move_to_end(key) | |
| return self.cache[key] | |
| def set(self, key: str, value: Any) -> None: | |
| """ | |
| Set item in cache | |
| """ | |
| # Remove if exists | |
| if key in self.cache: | |
| self.cache.pop(key) | |
| # Evict if needed | |
| elif (len(self.cache) >= self.max_size): | |
| oldest_key = next(iter(self.cache)) | |
| self.delete(oldest_key) | |
| # Add new item | |
| self.cache[key] = value | |
| self.timestamps[key] = time.time() | |
| def delete(self, key: str) -> bool: | |
| """ | |
| Delete item from cache | |
| """ | |
| if key in self.cache: | |
| self.cache.pop(key) | |
| self.timestamps.pop(key, None) | |
| return True | |
| return False | |
| def clear(self) -> None: | |
| """ | |
| Clear entire cache | |
| """ | |
| self.cache.clear() | |
| self.timestamps.clear() | |
| def _is_expired(self, key: str) -> bool: | |
| """ | |
| Check if item has expired | |
| """ | |
| if key not in self.timestamps: | |
| return True | |
| return ((time.time() - self.timestamps[key]) > self.ttl) | |
| def size(self) -> int: | |
| """ | |
| Get current cache size | |
| """ | |
| return len(self.cache) | |
| def keys(self) -> list: | |
| """ | |
| Get all cache keys | |
| """ | |
| return list(self.cache.keys()) | |
| class EmbeddingCache: | |
| """ | |
| Specialized cache for embeddings with serialization support | |
| """ | |
| def __init__(self, max_size: int = 1000, ttl: int = 86400, auto_persist: bool = True, persist_path: Optional[str] = None): | |
| self.cache = LRUCache(max_size = max_size, | |
| ttl = ttl, | |
| ) | |
| self.hits = 0 | |
| self.misses = 0 | |
| self.auto_persist = auto_persist | |
| self.persist_path = persist_path or "cache/embeddings.pkl" | |
| # Ensure cache directory exists | |
| cache_dir = Path(self.persist_path).parent | |
| cache_dir.mkdir(parents = True, exist_ok = True) | |
| # Load cache on startup if exists | |
| if (auto_persist and Path(self.persist_path).exists()): | |
| self.load_from_file(self.persist_path) | |
| def get_embedding(self, text: str) -> Optional[list]: | |
| """ | |
| Get embedding for text | |
| """ | |
| key = self._generate_key(text) | |
| result = self.cache.get(key) | |
| if result is not None: | |
| self.hits += 1 | |
| else: | |
| self.misses += 1 | |
| return result | |
| def set_embedding(self, text: str, embedding: list) -> None: | |
| """ | |
| Set embedding for text | |
| """ | |
| key = self._generate_key(text) | |
| self.cache.set(key, embedding) | |
| def _generate_key(self, text: str) -> str: | |
| """ | |
| Generate cache key from text | |
| """ | |
| return f"emb_{hash(text) & 0xFFFFFFFF}" | |
| def get_stats(self) -> Dict[str, Any]: | |
| """ | |
| Get cache statistics | |
| """ | |
| total = self.hits + self.misses | |
| hit_rate = (self.hits / total * 100) if (total > 0) else 0 | |
| return {"hits" : self.hits, | |
| "misses" : self.misses, | |
| "hit_rate" : hit_rate, | |
| "size" : self.cache.size(), | |
| "max_size" : self.cache.max_size, | |
| } | |
| def save_to_file(self, file_path: str) -> bool: | |
| """ | |
| Save cache to file | |
| """ | |
| try: | |
| # Ensure directory exists | |
| Path(file_path).parent.mkdir(parents = True, exist_ok = True) | |
| with open(file_path, 'wb') as f: | |
| pickle.dump({'cache' : self.cache.cache, | |
| 'timestamps' : self.cache.timestamps, | |
| 'hits' : self.hits, | |
| 'misses' : self.misses, | |
| }, | |
| f | |
| ) | |
| return True | |
| except Exception as e: | |
| logger.error(f"Failed to save cache: {repr(e)}") | |
| return False | |
| def load_from_file(self, file_path: str) -> bool: | |
| """ | |
| Load cache from file | |
| """ | |
| try: | |
| with open(file_path, 'rb') as f: | |
| data = pickle.load(f) | |
| self.cache.cache = data['cache'] | |
| self.cache.timestamps = data['timestamps'] | |
| self.hits = data.get('hits', 0) | |
| self.misses = data.get('misses', 0) | |
| return True | |
| except Exception as e: | |
| logger.error(f"Failed to load cache: {repr(e)}") | |
| return False | |
| def __del__(self): | |
| """ | |
| Auto-save cache on destruction | |
| """ | |
| if self.auto_persist: | |
| self.save_to_file(self.persist_path) | |
| # Global cache instances | |
| embedding_cache = EmbeddingCache(max_size = settings.CACHE_MAX_SIZE, | |
| ttl = settings.CACHE_TTL, | |
| ) | |
| # Convenience functions | |
| def get_embedding_cache() -> EmbeddingCache: | |
| """ | |
| Get global embedding cache instance | |
| """ | |
| return embedding_cache | |
| def clear_embedding_cache() -> None: | |
| """ | |
| Clear embedding cache | |
| """ | |
| embedding_cache.cache.clear() | |
| embedding_cache.hits = 0 | |
| embedding_cache.misses = 0 |