Spaces:
Sleeping
Sleeping
| import json | |
| import os | |
| import time | |
| import hashlib | |
| import logging | |
| from typing import Dict, Any, Optional, List | |
| from pathlib import Path | |
| from cachetools import TTLCache, LRUCache | |
| from threading import Lock | |
| from app.utils.config import Config | |
| logger = logging.getLogger(__name__) | |
| class CacheService: | |
| """Service for caching LinkedIn search results and profile data""" | |
| def __init__(self): | |
| self.cache_enabled = Config.CACHE_ENABLED | |
| self.cache_ttl = Config.CACHE_TTL | |
| self.cache_max_size = Config.CACHE_MAX_SIZE | |
| self.cache_file_path = Config.CACHE_FILE_PATH | |
| # Initialize caches | |
| self._init_caches() | |
| # Thread safety | |
| self._lock = Lock() | |
| logger.info(f"🔧 Cache service initialized - Enabled: {self.cache_enabled}, TTL: {self.cache_ttl}s, Max Size: {self.cache_max_size}") | |
| def _init_caches(self): | |
| """Initialize different types of caches""" | |
| if not self.cache_enabled: | |
| self.search_cache = None | |
| self.profile_cache = None | |
| self.query_cache = None | |
| return | |
| # TTL cache for search results (expires after TTL) | |
| self.search_cache = TTLCache( | |
| maxsize=self.cache_max_size, | |
| ttl=self.cache_ttl | |
| ) | |
| # TTL cache for individual profile data (longer TTL for profile data) | |
| self.profile_cache = TTLCache( | |
| maxsize=self.cache_max_size * 2, # More space for profiles | |
| ttl=self.cache_ttl * 2 # Longer TTL for profile data | |
| ) | |
| # LRU cache for query results (no TTL, just size limit) | |
| self.query_cache = LRUCache( | |
| maxsize=self.cache_max_size // 2 | |
| ) | |
| # Load persistent cache from file | |
| self._load_persistent_cache() | |
| logger.info("✅ Caches initialized successfully") | |
| def _load_persistent_cache(self): | |
| """Load cache data from persistent storage""" | |
| try: | |
| cache_file = Path(self.cache_file_path) | |
| if cache_file.exists(): | |
| with open(cache_file, 'r') as f: | |
| cache_data = json.load(f) | |
| # Load search cache | |
| if 'search_cache' in cache_data: | |
| for key, value in cache_data['search_cache'].items(): | |
| if self._is_cache_entry_valid(value): | |
| self.search_cache[key] = value['data'] | |
| # Load profile cache | |
| if 'profile_cache' in cache_data: | |
| for key, value in cache_data['profile_cache'].items(): | |
| if self._is_cache_entry_valid(value): | |
| self.profile_cache[key] = value['data'] | |
| logger.info(f"📁 Loaded persistent cache from {cache_file}") | |
| else: | |
| logger.info("📁 No existing cache file found, starting fresh") | |
| except Exception as e: | |
| logger.warning(f"⚠️ Failed to load persistent cache: {str(e)}") | |
| def _save_persistent_cache(self): | |
| """Save cache data to persistent storage""" | |
| if not self.cache_enabled: | |
| return | |
| try: | |
| cache_file = Path(self.cache_file_path) | |
| cache_file.parent.mkdir(parents=True, exist_ok=True) | |
| cache_data = { | |
| 'search_cache': {}, | |
| 'profile_cache': {}, | |
| 'timestamp': time.time() | |
| } | |
| # Save search cache | |
| if self.search_cache: | |
| for key, value in self.search_cache.items(): | |
| cache_data['search_cache'][key] = { | |
| 'data': value, | |
| 'timestamp': time.time() | |
| } | |
| # Save profile cache | |
| if self.profile_cache: | |
| for key, value in self.profile_cache.items(): | |
| cache_data['profile_cache'][key] = { | |
| 'data': value, | |
| 'timestamp': time.time() | |
| } | |
| with open(cache_file, 'w') as f: | |
| json.dump(cache_data, f, indent=2) | |
| logger.info(f"💾 Saved persistent cache to {cache_file}") | |
| except Exception as e: | |
| logger.warning(f"⚠️ Failed to save persistent cache: {str(e)}") | |
| def _is_cache_entry_valid(self, entry: Dict) -> bool: | |
| """Check if a cache entry is still valid (not expired)""" | |
| if not isinstance(entry, dict) or 'timestamp' not in entry: | |
| return False | |
| timestamp = entry.get('timestamp', 0) | |
| return (time.time() - timestamp) < self.cache_ttl | |
| def _generate_cache_key(self, *args, **kwargs) -> str: | |
| """Generate a unique cache key from function arguments""" | |
| # Create a string representation of the arguments | |
| key_parts = [] | |
| # Add positional arguments | |
| for arg in args: | |
| if isinstance(arg, str): | |
| key_parts.append(arg) | |
| else: | |
| key_parts.append(str(arg)) | |
| # Add keyword arguments (sorted for consistency) | |
| for key, value in sorted(kwargs.items()): | |
| if isinstance(value, str): | |
| key_parts.append(f"{key}:{value}") | |
| else: | |
| key_parts.append(f"{key}:{str(value)}") | |
| # Create hash of the combined string | |
| key_string = "|".join(key_parts) | |
| return hashlib.md5(key_string.encode()).hexdigest() | |
| def get_search_results(self, job_description: str, location: Optional[str] = None, max_results: int = 10) -> Optional[List[Dict]]: | |
| """Get cached search results for a job description""" | |
| if not self.cache_enabled or not self.search_cache: | |
| return None | |
| cache_key = self._generate_cache_key( | |
| "search", | |
| job_description, | |
| location or "any", | |
| max_results | |
| ) | |
| with self._lock: | |
| try: | |
| results = self.search_cache.get(cache_key) | |
| if results: | |
| logger.info(f"🎯 Cache HIT for search: {job_description[:50]}...") | |
| return results | |
| else: | |
| logger.info(f"❌ Cache MISS for search: {job_description[:50]}...") | |
| return None | |
| except Exception as e: | |
| logger.warning(f"⚠️ Error accessing search cache: {str(e)}") | |
| return None | |
| def set_search_results(self, job_description: str, location: Optional[str] = None, max_results: int = 10, results: List[Dict] = None): | |
| """Cache search results for a job description""" | |
| if not self.cache_enabled or not self.search_cache or not results: | |
| return | |
| cache_key = self._generate_cache_key( | |
| "search", | |
| job_description, | |
| location or "any", | |
| max_results | |
| ) | |
| with self._lock: | |
| try: | |
| self.search_cache[cache_key] = results | |
| logger.info(f"💾 Cached search results for: {job_description[:50]}...") | |
| # Periodically save to persistent storage | |
| if len(self.search_cache) % 10 == 0: # Save every 10 entries | |
| self._save_persistent_cache() | |
| except Exception as e: | |
| logger.warning(f"⚠️ Error caching search results: {str(e)}") | |
| def get_profile_data(self, profile_url: str) -> Optional[Dict]: | |
| """Get cached profile data for a LinkedIn profile URL""" | |
| if not self.cache_enabled or not self.profile_cache: | |
| return None | |
| cache_key = self._generate_cache_key("profile", profile_url) | |
| with self._lock: | |
| try: | |
| profile_data = self.profile_cache.get(cache_key) | |
| if profile_data: | |
| logger.info(f"🎯 Cache HIT for profile: {profile_url}") | |
| return profile_data | |
| else: | |
| logger.info(f"❌ Cache MISS for profile: {profile_url}") | |
| return None | |
| except Exception as e: | |
| logger.warning(f"⚠️ Error accessing profile cache: {str(e)}") | |
| return None | |
| def set_profile_data(self, profile_url: str, profile_data: Dict): | |
| """Cache profile data for a LinkedIn profile URL""" | |
| if not self.cache_enabled or not self.profile_cache or not profile_data: | |
| return | |
| cache_key = self._generate_cache_key("profile", profile_url) | |
| with self._lock: | |
| try: | |
| self.profile_cache[cache_key] = profile_data | |
| logger.info(f"💾 Cached profile data for: {profile_url}") | |
| # Periodically save to persistent storage | |
| if len(self.profile_cache) % 20 == 0: # Save every 20 entries | |
| self._save_persistent_cache() | |
| except Exception as e: | |
| logger.warning(f"⚠️ Error caching profile data: {str(e)}") | |
| def get_query_results(self, query: str, max_results: int = 10) -> Optional[List[Dict]]: | |
| """Get cached Google search query results""" | |
| if not self.cache_enabled or not self.query_cache: | |
| return None | |
| cache_key = self._generate_cache_key("query", query, max_results) | |
| with self._lock: | |
| try: | |
| results = self.query_cache.get(cache_key) | |
| if results: | |
| logger.info(f"🎯 Cache HIT for query: {query[:50]}...") | |
| return results | |
| else: | |
| logger.info(f"❌ Cache MISS for query: {query[:50]}...") | |
| return None | |
| except Exception as e: | |
| logger.warning(f"⚠️ Error accessing query cache: {str(e)}") | |
| return None | |
| def set_query_results(self, query: str, max_results: int = 10, results: List[Dict] = None): | |
| """Cache Google search query results""" | |
| if not self.cache_enabled or not self.query_cache or not results: | |
| return | |
| cache_key = self._generate_cache_key("query", query, max_results) | |
| with self._lock: | |
| try: | |
| self.query_cache[cache_key] = results | |
| logger.info(f"💾 Cached query results for: {query[:50]}...") | |
| except Exception as e: | |
| logger.warning(f"⚠️ Error caching query results: {str(e)}") | |
| def clear_cache(self, cache_type: str = "all"): | |
| """Clear specified cache or all caches""" | |
| with self._lock: | |
| try: | |
| if cache_type == "all" or cache_type == "search": | |
| if self.search_cache: | |
| self.search_cache.clear() | |
| logger.info("🧹 Cleared search cache") | |
| if cache_type == "all" or cache_type == "profile": | |
| if self.profile_cache: | |
| self.profile_cache.clear() | |
| logger.info("🧹 Cleared profile cache") | |
| if cache_type == "all" or cache_type == "query": | |
| if self.query_cache: | |
| self.query_cache.clear() | |
| logger.info("🧹 Cleared query cache") | |
| # Save empty cache to persistent storage | |
| self._save_persistent_cache() | |
| except Exception as e: | |
| logger.warning(f"⚠️ Error clearing cache: {str(e)}") | |
| def get_cache_stats(self) -> Dict[str, Any]: | |
| """Get statistics about the cache usage""" | |
| stats = { | |
| 'cache_enabled': self.cache_enabled, | |
| 'cache_ttl': self.cache_ttl, | |
| 'cache_max_size': self.cache_max_size | |
| } | |
| if self.cache_enabled: | |
| stats.update({ | |
| 'search_cache_size': len(self.search_cache) if self.search_cache else 0, | |
| 'profile_cache_size': len(self.profile_cache) if self.profile_cache else 0, | |
| 'query_cache_size': len(self.query_cache) if self.query_cache else 0, | |
| 'search_cache_currsize': self.search_cache.currsize if self.search_cache else 0, | |
| 'profile_cache_currsize': self.profile_cache.currsize if self.profile_cache else 0, | |
| 'query_cache_currsize': self.query_cache.currsize if self.query_cache else 0 | |
| }) | |
| return stats | |
| def cleanup_expired_entries(self): | |
| """Clean up expired entries from all caches""" | |
| if not self.cache_enabled: | |
| return | |
| with self._lock: | |
| try: | |
| # TTLCache automatically handles expiration | |
| # Just trigger expiration check | |
| if self.search_cache: | |
| self.search_cache.expire() | |
| if self.profile_cache: | |
| self.profile_cache.expire() | |
| logger.info("🧹 Cleaned up expired cache entries") | |
| except Exception as e: | |
| logger.warning(f"⚠️ Error cleaning up expired entries: {str(e)}") | |
| def __del__(self): | |
| """Cleanup when the cache service is destroyed""" | |
| try: | |
| self._save_persistent_cache() | |
| except: | |
| pass # Ignore errors during cleanup |