"""HF Dataset repo storage for user notebook data.""" import json import os import tempfile from huggingface_hub import HfApi, hf_hub_download from state import UserData class StorageService: REPO_ID = "Group-1-5010/notebooklm-data" REPO_TYPE = "dataset" _repo_ensured = False @staticmethod def _get_token(): token = os.environ.get("HF_TOKEN") if not token: raise RuntimeError("HF_TOKEN not found in environment. Add it as a Secret in your HF Space settings.") return token @staticmethod def _ensure_repo(api: HfApi): """Create the dataset repo if it doesn't already exist (once per process).""" if StorageService._repo_ensured: return api.create_repo( repo_id=StorageService.REPO_ID, repo_type=StorageService.REPO_TYPE, private=True, exist_ok=True, ) StorageService._repo_ensured = True print(f"[StorageService] Ensured dataset repo exists: {StorageService.REPO_ID}", flush=True) @staticmethod def save_user_data(user_data: UserData) -> None: """Serialize UserData to JSON and upload to HF Dataset repo. Raises on failure so the caller can show the error to the user. """ token = StorageService._get_token() data_json = json.dumps(user_data.to_dict(), ensure_ascii=False, indent=2) tmp_path = None try: with tempfile.NamedTemporaryFile( mode="w", suffix=".json", delete=False ) as tmp: tmp.write(data_json) tmp_path = tmp.name api = HfApi(token=token) StorageService._ensure_repo(api) api.upload_file( path_or_fileobj=tmp_path, path_in_repo=f"data/{user_data.user_id}.json", repo_id=StorageService.REPO_ID, repo_type=StorageService.REPO_TYPE, ) print(f"[StorageService] Saved user data for '{user_data.user_id}'", flush=True) finally: if tmp_path: try: os.unlink(tmp_path) except Exception: pass @staticmethod def load_user_data(user_id: str, user_name: str) -> UserData | None: """Download and deserialize UserData from HF Dataset repo. Returns None if the file doesn't exist (new user). """ try: token = StorageService._get_token() except RuntimeError: print("[StorageService] HF_TOKEN not set — skipping load", flush=True) return None try: path = hf_hub_download( repo_id=StorageService.REPO_ID, filename=f"data/{user_id}.json", repo_type=StorageService.REPO_TYPE, token=token, ) with open(path, "r") as f: data = json.load(f) print(f"[StorageService] Loaded user data for '{user_id}'", flush=True) return UserData.from_dict(data) except Exception as e: # EntryNotFoundError or network errors — treat as new user print(f"[StorageService] No existing data for '{user_id}': {e}", flush=True) return None