File size: 3,439 Bytes
e26d9d3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import hashlib
import os
from dataclasses import dataclass

import pandas as pd


def _project_root() -> str:
    return os.path.dirname(os.path.dirname(os.path.abspath(__file__)))


def cache_root() -> str:
    # Priority: env var > /tmp (for HF Spaces/containers) > project root (local dev)
    env_cache = os.environ.get("CACHE_DIR")
    if env_cache:
        path = os.path.join(env_cache, "panel_app_v2")
    elif os.path.exists("/tmp") and os.access("/tmp", os.W_OK):
        # On Hugging Face Spaces and Linux containers, /tmp is always writable
        path = os.path.join("/tmp", "panel_app_v2_cache")
    else:
        # Fallback to project root for local development
        root = _project_root()
        path = os.path.join(root, ".cache", "panel_app_v2")
    os.makedirs(path, exist_ok=True)
    return path


def _safe_str(value: object) -> str:
    try:
        return str(value or "")
    except Exception:
        return ""


def fingerprint_bytes(file_bytes: bytes, filename: str = "", extra: str = "") -> str:
    h = hashlib.blake2b(digest_size=16)
    if file_bytes:
        h.update(file_bytes)
    name = _safe_str(filename)
    if name:
        h.update(name.encode("utf-8", errors="ignore"))
    ex = _safe_str(extra)
    if ex:
        h.update(ex.encode("utf-8", errors="ignore"))
    return h.hexdigest()


def _has_pyarrow() -> bool:
    try:
        import pyarrow  # noqa: F401

        return True
    except Exception:
        return False


def _has_duckdb() -> bool:
    try:
        import duckdb  # noqa: F401

        return True
    except Exception:
        return False


def write_table(df: pd.DataFrame, path_no_ext: str) -> str:
    if _has_pyarrow():
        path = path_no_ext + ".parquet"
        df.to_parquet(path, index=False)
        return path
    path = path_no_ext + ".pkl"
    df.to_pickle(path)
    return path


def read_table(path: str) -> pd.DataFrame:
    if not path or not os.path.exists(path):
        return pd.DataFrame()
    p = str(path).lower()
    if p.endswith(".parquet"):
        return pd.read_parquet(path)
    return pd.read_pickle(path)


@dataclass(frozen=True)
class CachedDataset:
    dataset_id: str
    rat: str
    granularity: str

    def base_dir(self) -> str:
        return os.path.join(
            cache_root(),
            self.dataset_id,
            f"rat={self.rat}",
            f"granularity={self.granularity}",
        )

    def daily_table_base(self) -> str:
        return os.path.join(self.base_dir(), "daily")

    def meta_path(self) -> str:
        return os.path.join(self.base_dir(), "meta.json")


def try_load_cached_daily(dataset: CachedDataset) -> pd.DataFrame | None:
    base = dataset.daily_table_base()
    candidates = [base + ".parquet", base + ".pkl"]
    for p in candidates:
        if os.path.exists(p):
            try:
                df = read_table(p)
                return df if isinstance(df, pd.DataFrame) else pd.DataFrame()
            except Exception:
                return pd.DataFrame()
    return None


def save_cached_daily(dataset: CachedDataset, daily: pd.DataFrame) -> str:
    os.makedirs(dataset.base_dir(), exist_ok=True)
    return write_table(daily, dataset.daily_table_base())


def ensure_duckdb_available() -> None:
    if not _has_duckdb():
        raise RuntimeError(
            "DuckDB is not installed. Install it to enable the V2 SQL engine: python -m pip install duckdb"
        )