"""Thread-safe expense ledger with HuggingFace Hub CSV persistence.""" import os import logging import threading import pandas as pd from pathlib import Path logger = logging.getLogger(__name__) COLUMNS = ["Date", "Description", "Category", "Amount"] CSV_NAME = "ledger.csv" CACHE_PATH = Path("/tmp/finance_ledger.csv") class Ledger: def __init__(self): self._lock = threading.RLock() self.token = os.getenv("HF_TOKEN") self.repo = os.getenv("HF_LEDGER_REPO") self.enabled = bool(self.token and self.repo) self.df = self._load() # ── persistence ────────────────────────────────────────────────────────── def _load(self) -> pd.DataFrame: if self.enabled: try: self._ensure_repo() from huggingface_hub import hf_hub_download path = hf_hub_download( self.repo, CSV_NAME, repo_type="dataset", token=self.token, local_dir="/tmp", ) return self._read_csv(path) except Exception as e: logger.warning(f"HF load failed ({e}), falling back to local cache") if CACHE_PATH.exists(): try: return self._read_csv(CACHE_PATH) except Exception as e: logger.warning(f"Local cache load failed: {e}") return pd.DataFrame(columns=COLUMNS) def _read_csv(self, path) -> pd.DataFrame: df = pd.read_csv(path) df["Date"] = pd.to_datetime(df["Date"]) df["Amount"] = pd.to_numeric(df["Amount"]) return df.sort_values("Date", ascending=False).reset_index(drop=True) def _ensure_repo(self): from huggingface_hub import repo_exists, create_repo if not repo_exists(self.repo, repo_type="dataset", token=self.token): create_repo(self.repo, repo_type="dataset", private=True, token=self.token, exist_ok=True) def _persist(self): df_copy = self.df.copy() if not df_copy.empty: df_copy["Date"] = df_copy["Date"].dt.strftime("%Y-%m-%d") df_copy.to_csv(CACHE_PATH, index=False) if not self.enabled: return try: from huggingface_hub import upload_file upload_file( path_or_fileobj=str(CACHE_PATH), path_in_repo=CSV_NAME, repo_id=self.repo, repo_type="dataset", token=self.token, commit_message="ledger update", ) except Exception as e: logger.error(f"HF upload failed: {e}") # ── mutations ───────────────────────────────────────────────────────────── def add(self, date: str, description: str, category: str, amount: float) -> bool: with self._lock: try: row = pd.DataFrame({ "Date": [pd.to_datetime(date)], "Description": [description], "Category": [category], "Amount": [float(amount)], }) self.df = pd.concat([self.df, row], ignore_index=True) self.df = self.df.sort_values("Date", ascending=False).reset_index(drop=True) self._persist() return True except Exception as e: logger.error(f"add failed: {e}") return False def delete_last(self) -> bool: with self._lock: if self.df.empty: return False self.df = self.df.iloc[1:].reset_index(drop=True) self._persist() return True # ── queries ─────────────────────────────────────────────────────────────── def total(self) -> float: return float(self.df["Amount"].sum()) if not self.df.empty else 0.0 def by_category(self) -> dict[str, float]: if self.df.empty: return {} return self.df.groupby("Category")["Amount"].sum().to_dict() def recent(self, n: int = 50) -> pd.DataFrame: df = self.df.head(n).copy() if not df.empty: df["Date"] = df["Date"].dt.strftime("%Y-%m-%d") return df @property def status(self) -> str: return f"✅ HF Hub: `{self.repo}`" if self.enabled else "⚠️ Local cache only" # ── singleton ───────────────────────────────────────────────────────────────── _instance: Ledger | None = None def get_ledger() -> Ledger: global _instance if _instance is None: _instance = Ledger() return _instance