Spaces:
Sleeping
Sleeping
| """HF Dataset repo storage for user notebook data.""" | |
| import json | |
| import os | |
| import tempfile | |
| from huggingface_hub import HfApi, hf_hub_download | |
| from state import UserData | |
| class StorageService: | |
| REPO_ID = "Group-1-5010/notebooklm-data" | |
| REPO_TYPE = "dataset" | |
| _repo_ensured = False | |
| def _get_token(): | |
| token = os.environ.get("HF_TOKEN") | |
| if not token: | |
| raise RuntimeError("HF_TOKEN not found in environment. Add it as a Secret in your HF Space settings.") | |
| return token | |
| def _ensure_repo(api: HfApi): | |
| """Create the dataset repo if it doesn't already exist (once per process).""" | |
| if StorageService._repo_ensured: | |
| return | |
| api.create_repo( | |
| repo_id=StorageService.REPO_ID, | |
| repo_type=StorageService.REPO_TYPE, | |
| private=True, | |
| exist_ok=True, | |
| ) | |
| StorageService._repo_ensured = True | |
| print(f"[StorageService] Ensured dataset repo exists: {StorageService.REPO_ID}", flush=True) | |
| def save_user_data(user_data: UserData) -> None: | |
| """Serialize UserData to JSON and upload to HF Dataset repo. | |
| Raises on failure so the caller can show the error to the user. | |
| """ | |
| token = StorageService._get_token() | |
| data_json = json.dumps(user_data.to_dict(), ensure_ascii=False, indent=2) | |
| tmp_path = None | |
| try: | |
| with tempfile.NamedTemporaryFile( | |
| mode="w", suffix=".json", delete=False | |
| ) as tmp: | |
| tmp.write(data_json) | |
| tmp_path = tmp.name | |
| api = HfApi(token=token) | |
| StorageService._ensure_repo(api) | |
| api.upload_file( | |
| path_or_fileobj=tmp_path, | |
| path_in_repo=f"data/{user_data.user_id}.json", | |
| repo_id=StorageService.REPO_ID, | |
| repo_type=StorageService.REPO_TYPE, | |
| ) | |
| print(f"[StorageService] Saved user data for '{user_data.user_id}'", flush=True) | |
| finally: | |
| if tmp_path: | |
| try: | |
| os.unlink(tmp_path) | |
| except Exception: | |
| pass | |
| def load_user_data(user_id: str, user_name: str) -> UserData | None: | |
| """Download and deserialize UserData from HF Dataset repo. | |
| Returns None if the file doesn't exist (new user). | |
| """ | |
| try: | |
| token = StorageService._get_token() | |
| except RuntimeError: | |
| print("[StorageService] HF_TOKEN not set — skipping load", flush=True) | |
| return None | |
| try: | |
| path = hf_hub_download( | |
| repo_id=StorageService.REPO_ID, | |
| filename=f"data/{user_id}.json", | |
| repo_type=StorageService.REPO_TYPE, | |
| token=token, | |
| ) | |
| with open(path, "r") as f: | |
| data = json.load(f) | |
| print(f"[StorageService] Loaded user data for '{user_id}'", flush=True) | |
| return UserData.from_dict(data) | |
| except Exception as e: | |
| # EntryNotFoundError or network errors — treat as new user | |
| print(f"[StorageService] No existing data for '{user_id}': {e}", flush=True) | |
| return None | |