Spaces:
Running
Running
File size: 4,237 Bytes
2ed2bd7 29ed661 2ed2bd7 29ed661 2ed2bd7 29ed661 2ed2bd7 29ed661 2ed2bd7 29ed661 2ed2bd7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
"""
Simple in-memory cache with TTL for V3 web scraping API.
"""
import time
from threading import Lock
from typing import Any
from app.core.logging import get_logger
logger = get_logger(__name__)
class SimpleCache:
"""Thread-safe in-memory cache with TTL-based expiration."""
def __init__(self, ttl_seconds: int = 3600, max_size: int = 1000):
"""
Initialize cache with TTL and max size.
Args:
ttl_seconds: Time-to-live for cache entries in seconds (default: 1 hour)
max_size: Maximum number of entries to store (default: 1000)
"""
self._cache: dict[str, dict[str, Any]] = {}
self._lock = Lock()
self._ttl = ttl_seconds
self._max_size = max_size
self._hits = 0
self._misses = 0
logger.info(f"Cache initialized with TTL={ttl_seconds}s, max_size={max_size}")
def get(self, key: str) -> dict[str, Any] | None:
"""
Get cached content for key.
Args:
key: Cache key (typically a URL)
Returns:
Cached data if found and not expired, None otherwise
"""
with self._lock:
if key not in self._cache:
self._misses += 1
return None
entry = self._cache[key]
expiry_time = entry["expiry"]
# Check if expired
if time.time() > expiry_time:
del self._cache[key]
self._misses += 1
logger.debug(f"Cache expired for key: {key[:50]}...")
return None
self._hits += 1
logger.debug(f"Cache hit for key: {key[:50]}...")
return entry["data"]
def set(self, key: str, data: dict[str, Any]) -> None:
"""
Cache content with TTL.
Args:
key: Cache key (typically a URL)
data: Data to cache
"""
with self._lock:
# Enforce max size by removing oldest entry
if len(self._cache) >= self._max_size:
oldest_key = min(
self._cache.keys(), key=lambda k: self._cache[k]["expiry"]
)
del self._cache[oldest_key]
logger.debug(f"Cache full, removed oldest entry: {oldest_key[:50]}...")
expiry_time = time.time() + self._ttl
self._cache[key] = {
"data": data,
"expiry": expiry_time,
"created": time.time(),
}
logger.debug(f"Cached key: {key[:50]}...")
def clear_expired(self) -> int:
"""
Remove all expired entries from cache.
Returns:
Number of entries removed
"""
with self._lock:
current_time = time.time()
expired_keys = [
key
for key, entry in self._cache.items()
if current_time > entry["expiry"]
]
for key in expired_keys:
del self._cache[key]
if expired_keys:
logger.info(f"Cleared {len(expired_keys)} expired cache entries")
return len(expired_keys)
def clear_all(self) -> None:
"""Clear all cache entries."""
with self._lock:
count = len(self._cache)
self._cache.clear()
self._hits = 0
self._misses = 0
logger.info(f"Cleared all {count} cache entries")
def stats(self) -> dict[str, int]:
"""
Get cache statistics.
Returns:
Dictionary with cache metrics
"""
with self._lock:
total_requests = self._hits + self._misses
hit_rate = (
(self._hits / total_requests * 100) if total_requests > 0 else 0.0
)
return {
"size": len(self._cache),
"max_size": self._max_size,
"hits": self._hits,
"misses": self._misses,
"hit_rate": round(hit_rate, 2),
"ttl_seconds": self._ttl,
}
# Global cache instance for scraped content
scraping_cache = SimpleCache(ttl_seconds=3600, max_size=1000)
|