Spaces:
Sleeping
Sleeping
| import hashlib | |
| import os | |
| from dataclasses import dataclass | |
| import pandas as pd | |
| def _project_root() -> str: | |
| return os.path.dirname(os.path.dirname(os.path.abspath(__file__))) | |
| def cache_root() -> str: | |
| root = _project_root() | |
| path = os.path.join(root, ".cache", "panel_app_v2") | |
| os.makedirs(path, exist_ok=True) | |
| return path | |
| def _safe_str(value: object) -> str: | |
| try: | |
| return str(value or "") | |
| except Exception: | |
| return "" | |
| def fingerprint_bytes(file_bytes: bytes, filename: str = "", extra: str = "") -> str: | |
| h = hashlib.blake2b(digest_size=16) | |
| if file_bytes: | |
| h.update(file_bytes) | |
| name = _safe_str(filename) | |
| if name: | |
| h.update(name.encode("utf-8", errors="ignore")) | |
| ex = _safe_str(extra) | |
| if ex: | |
| h.update(ex.encode("utf-8", errors="ignore")) | |
| return h.hexdigest() | |
| def _has_pyarrow() -> bool: | |
| try: | |
| import pyarrow # noqa: F401 | |
| return True | |
| except Exception: | |
| return False | |
| def _has_duckdb() -> bool: | |
| try: | |
| import duckdb # noqa: F401 | |
| return True | |
| except Exception: | |
| return False | |
| def write_table(df: pd.DataFrame, path_no_ext: str) -> str: | |
| if _has_pyarrow(): | |
| path = path_no_ext + ".parquet" | |
| df.to_parquet(path, index=False) | |
| return path | |
| path = path_no_ext + ".pkl" | |
| df.to_pickle(path) | |
| return path | |
| def read_table(path: str) -> pd.DataFrame: | |
| if not path or not os.path.exists(path): | |
| return pd.DataFrame() | |
| p = str(path).lower() | |
| if p.endswith(".parquet"): | |
| return pd.read_parquet(path) | |
| return pd.read_pickle(path) | |
| class CachedDataset: | |
| dataset_id: str | |
| rat: str | |
| granularity: str | |
| def base_dir(self) -> str: | |
| return os.path.join( | |
| cache_root(), | |
| self.dataset_id, | |
| f"rat={self.rat}", | |
| f"granularity={self.granularity}", | |
| ) | |
| def daily_table_base(self) -> str: | |
| return os.path.join(self.base_dir(), "daily") | |
| def meta_path(self) -> str: | |
| return os.path.join(self.base_dir(), "meta.json") | |
| def try_load_cached_daily(dataset: CachedDataset) -> pd.DataFrame | None: | |
| base = dataset.daily_table_base() | |
| candidates = [base + ".parquet", base + ".pkl"] | |
| for p in candidates: | |
| if os.path.exists(p): | |
| try: | |
| df = read_table(p) | |
| return df if isinstance(df, pd.DataFrame) else pd.DataFrame() | |
| except Exception: | |
| return pd.DataFrame() | |
| return None | |
| def save_cached_daily(dataset: CachedDataset, daily: pd.DataFrame) -> str: | |
| os.makedirs(dataset.base_dir(), exist_ok=True) | |
| return write_table(daily, dataset.daily_table_base()) | |
| def ensure_duckdb_available() -> None: | |
| if not _has_duckdb(): | |
| raise RuntimeError( | |
| "DuckDB is not installed. Install it to enable the V2 SQL engine: python -m pip install duckdb" | |
| ) | |