Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| import json | |
| import shutil | |
| from datetime import datetime, timezone | |
| from pathlib import Path | |
| from huggingface_hub import hf_hub_download, HfApi # type: ignore | |
| from .models import EventResult, LlmConfig | |
| from .ui_log import info | |
| HF_CACHE_PATH = "events.json" | |
| PATH = HF_CACHE_PATH | |
| CACHE_SCHEMA_VERSION = 4 | |
| class CacheStore: | |
| def __init__( | |
| self, | |
| config: LlmConfig, | |
| ttl_hours: float, | |
| repo_id: str, | |
| hf_token: str | None = None, | |
| ): | |
| self.config = config | |
| self.ttl_hours = ttl_hours | |
| self.path = Path(PATH) | |
| self.hf_repo_id = repo_id | |
| self.hf_path = HF_CACHE_PATH | |
| self.hf_token = hf_token | |
| def pull_from_hf(self) -> None: | |
| info(f"Pulling cache from HF dataset: {self.hf_repo_id}/{self.hf_path}") | |
| downloaded = hf_hub_download( | |
| repo_id=self.hf_repo_id, | |
| filename=self.hf_path, | |
| repo_type="dataset", | |
| token=self.hf_token or None, | |
| ) | |
| self.path.parent.mkdir(parents=True, exist_ok=True) | |
| shutil.copy2(downloaded, self.path) | |
| def _push_to_hf(self) -> None: | |
| info(f"Pushing cache to HF dataset: {self.hf_repo_id}/{self.hf_path}") | |
| api = HfApi(token=self.hf_token or None) | |
| api.upload_file( | |
| path_or_fileobj=str(self.path), | |
| path_in_repo=self.hf_path, | |
| repo_id=self.hf_repo_id, | |
| repo_type="dataset", | |
| commit_message="Update robotic_seminars cache", | |
| ) | |
| def is_fresh(self) -> bool: | |
| if not self.path.exists(): | |
| return False | |
| cached_at = datetime.fromtimestamp(self.path.stat().st_mtime, tz=timezone.utc) | |
| return (datetime.now(timezone.utc) - cached_at).total_seconds() < self.ttl_hours * 3600 | |
| def is_usable(self) -> bool: | |
| if not self.is_fresh(): | |
| return False | |
| meta = json.loads(self.path.read_text(encoding="utf-8"))["meta"] | |
| return meta["schema_version"] == CACHE_SCHEMA_VERSION | |
| def write(self, *, results: list[EventResult]) -> None: | |
| self.path.parent.mkdir(parents=True, exist_ok=True) | |
| payload = { | |
| "meta": { | |
| "schema_version": CACHE_SCHEMA_VERSION, | |
| "cached_at": datetime.now(timezone.utc).isoformat(), | |
| "ttl_hours": self.ttl_hours, | |
| }, | |
| "results": [r.model_dump() for r in results], | |
| } | |
| self.path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8") | |
| self._push_to_hf() | |