Spaces:
Sleeping
Sleeping
File size: 3,439 Bytes
e26d9d3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 | import hashlib
import os
from dataclasses import dataclass
import pandas as pd
def _project_root() -> str:
return os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
def cache_root() -> str:
# Priority: env var > /tmp (for HF Spaces/containers) > project root (local dev)
env_cache = os.environ.get("CACHE_DIR")
if env_cache:
path = os.path.join(env_cache, "panel_app_v2")
elif os.path.exists("/tmp") and os.access("/tmp", os.W_OK):
# On Hugging Face Spaces and Linux containers, /tmp is always writable
path = os.path.join("/tmp", "panel_app_v2_cache")
else:
# Fallback to project root for local development
root = _project_root()
path = os.path.join(root, ".cache", "panel_app_v2")
os.makedirs(path, exist_ok=True)
return path
def _safe_str(value: object) -> str:
try:
return str(value or "")
except Exception:
return ""
def fingerprint_bytes(file_bytes: bytes, filename: str = "", extra: str = "") -> str:
h = hashlib.blake2b(digest_size=16)
if file_bytes:
h.update(file_bytes)
name = _safe_str(filename)
if name:
h.update(name.encode("utf-8", errors="ignore"))
ex = _safe_str(extra)
if ex:
h.update(ex.encode("utf-8", errors="ignore"))
return h.hexdigest()
def _has_pyarrow() -> bool:
try:
import pyarrow # noqa: F401
return True
except Exception:
return False
def _has_duckdb() -> bool:
try:
import duckdb # noqa: F401
return True
except Exception:
return False
def write_table(df: pd.DataFrame, path_no_ext: str) -> str:
if _has_pyarrow():
path = path_no_ext + ".parquet"
df.to_parquet(path, index=False)
return path
path = path_no_ext + ".pkl"
df.to_pickle(path)
return path
def read_table(path: str) -> pd.DataFrame:
if not path or not os.path.exists(path):
return pd.DataFrame()
p = str(path).lower()
if p.endswith(".parquet"):
return pd.read_parquet(path)
return pd.read_pickle(path)
@dataclass(frozen=True)
class CachedDataset:
dataset_id: str
rat: str
granularity: str
def base_dir(self) -> str:
return os.path.join(
cache_root(),
self.dataset_id,
f"rat={self.rat}",
f"granularity={self.granularity}",
)
def daily_table_base(self) -> str:
return os.path.join(self.base_dir(), "daily")
def meta_path(self) -> str:
return os.path.join(self.base_dir(), "meta.json")
def try_load_cached_daily(dataset: CachedDataset) -> pd.DataFrame | None:
base = dataset.daily_table_base()
candidates = [base + ".parquet", base + ".pkl"]
for p in candidates:
if os.path.exists(p):
try:
df = read_table(p)
return df if isinstance(df, pd.DataFrame) else pd.DataFrame()
except Exception:
return pd.DataFrame()
return None
def save_cached_daily(dataset: CachedDataset, daily: pd.DataFrame) -> str:
os.makedirs(dataset.base_dir(), exist_ok=True)
return write_table(daily, dataset.daily_table_base())
def ensure_duckdb_available() -> None:
if not _has_duckdb():
raise RuntimeError(
"DuckDB is not installed. Install it to enable the V2 SQL engine: python -m pip install duckdb"
)
|