File size: 3,878 Bytes
a1bf219
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
"""Cache storage implementation (in-memory and file-based)."""

import json
import logging
import os
from pathlib import Path
from typing import Any, Dict, Optional

logger = logging.getLogger(__name__)


class CacheStore:
    """In-memory and file-based cache storage."""

    def __init__(self, cache_dir: str = "data/cache"):
        """
        Initialize cache store.

        Args:
            cache_dir: Directory for file-based cache
        """
        self.memory_cache: Dict[str, Any] = {}
        self.cache_dir = Path(cache_dir)
        self.cache_dir.mkdir(parents=True, exist_ok=True)

    def _get_file_path(self, key: str) -> Path:
        """Get file path for cache key."""
        # Use hash to avoid filesystem issues with special characters
        safe_key = key.replace(":", "_").replace("/", "_")
        return self.cache_dir / f"{safe_key}.json"

    def get(self, key: str) -> Optional[Any]:
        """
        Get value from cache.

        Args:
            key: Cache key

        Returns:
            Cached value or None
        """
        # Try memory cache first
        if key in self.memory_cache:
            return self.memory_cache[key]

        # Try file cache
        file_path = self._get_file_path(key)
        if file_path.exists():
            try:
                with open(file_path, "r") as f:
                    data = json.load(f)
                    # Restore to memory cache
                    self.memory_cache[key] = data
                    return data
            except Exception as e:
                logger.error(f"Failed to read cache file {file_path}: {e}")
                return None

        return None

    def set(self, key: str, value: Any):
        """
        Set value in cache.

        Args:
            key: Cache key
            value: Value to cache
        """
        # Store in memory
        self.memory_cache[key] = value

        # Store in file (for persistence)
        file_path = self._get_file_path(key)
        try:
            with open(file_path, "w") as f:
                # Convert pandas DataFrames to dict for JSON serialization
                serializable_value = self._make_serializable(value)
                json.dump(serializable_value, f, default=str)
        except Exception as e:
            logger.error(f"Failed to write cache file {file_path}: {e}")

    def delete(self, key: str):
        """
        Delete value from cache.

        Args:
            key: Cache key
        """
        # Remove from memory
        if key in self.memory_cache:
            del self.memory_cache[key]

        # Remove file
        file_path = self._get_file_path(key)
        if file_path.exists():
            try:
                file_path.unlink()
            except Exception as e:
                logger.error(f"Failed to delete cache file {file_path}: {e}")

    def clear(self):
        """Clear all cache."""
        # Clear memory
        self.memory_cache.clear()

        # Clear files
        try:
            for file_path in self.cache_dir.glob("*.json"):
                file_path.unlink()
        except Exception as e:
            logger.error(f"Failed to clear cache directory: {e}")

    def _make_serializable(self, obj: Any) -> Any:
        """Convert object to JSON-serializable format."""
        from datetime import datetime

        import pandas as pd

        if isinstance(obj, pd.DataFrame):
            return {
                "__type__": "DataFrame",
                "data": obj.to_dict(orient="records"),
            }
        elif isinstance(obj, datetime):
            return obj.isoformat()
        elif isinstance(obj, dict):
            return {k: self._make_serializable(v) for k, v in obj.items()}
        elif isinstance(obj, list):
            return [self._make_serializable(item) for item in obj]
        else:
            return obj