Spaces:
Running
Running
File size: 3,878 Bytes
a1bf219 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
"""Cache storage implementation (in-memory and file-based)."""
import json
import logging
import os
from pathlib import Path
from typing import Any, Dict, Optional
logger = logging.getLogger(__name__)
class CacheStore:
"""In-memory and file-based cache storage."""
def __init__(self, cache_dir: str = "data/cache"):
"""
Initialize cache store.
Args:
cache_dir: Directory for file-based cache
"""
self.memory_cache: Dict[str, Any] = {}
self.cache_dir = Path(cache_dir)
self.cache_dir.mkdir(parents=True, exist_ok=True)
def _get_file_path(self, key: str) -> Path:
"""Get file path for cache key."""
# Use hash to avoid filesystem issues with special characters
safe_key = key.replace(":", "_").replace("/", "_")
return self.cache_dir / f"{safe_key}.json"
def get(self, key: str) -> Optional[Any]:
"""
Get value from cache.
Args:
key: Cache key
Returns:
Cached value or None
"""
# Try memory cache first
if key in self.memory_cache:
return self.memory_cache[key]
# Try file cache
file_path = self._get_file_path(key)
if file_path.exists():
try:
with open(file_path, "r") as f:
data = json.load(f)
# Restore to memory cache
self.memory_cache[key] = data
return data
except Exception as e:
logger.error(f"Failed to read cache file {file_path}: {e}")
return None
return None
def set(self, key: str, value: Any):
"""
Set value in cache.
Args:
key: Cache key
value: Value to cache
"""
# Store in memory
self.memory_cache[key] = value
# Store in file (for persistence)
file_path = self._get_file_path(key)
try:
with open(file_path, "w") as f:
# Convert pandas DataFrames to dict for JSON serialization
serializable_value = self._make_serializable(value)
json.dump(serializable_value, f, default=str)
except Exception as e:
logger.error(f"Failed to write cache file {file_path}: {e}")
def delete(self, key: str):
"""
Delete value from cache.
Args:
key: Cache key
"""
# Remove from memory
if key in self.memory_cache:
del self.memory_cache[key]
# Remove file
file_path = self._get_file_path(key)
if file_path.exists():
try:
file_path.unlink()
except Exception as e:
logger.error(f"Failed to delete cache file {file_path}: {e}")
def clear(self):
"""Clear all cache."""
# Clear memory
self.memory_cache.clear()
# Clear files
try:
for file_path in self.cache_dir.glob("*.json"):
file_path.unlink()
except Exception as e:
logger.error(f"Failed to clear cache directory: {e}")
def _make_serializable(self, obj: Any) -> Any:
"""Convert object to JSON-serializable format."""
from datetime import datetime
import pandas as pd
if isinstance(obj, pd.DataFrame):
return {
"__type__": "DataFrame",
"data": obj.to_dict(orient="records"),
}
elif isinstance(obj, datetime):
return obj.isoformat()
elif isinstance(obj, dict):
return {k: self._make_serializable(v) for k, v in obj.items()}
elif isinstance(obj, list):
return [self._make_serializable(item) for item in obj]
else:
return obj
|