Spaces:
Running
Running
| """ | |
| db_sync.py β Persists the Label Studio SQLite DB across HF Space rebuilds | |
| by syncing to/from the private TrustLLMeu/saga-db-backup dataset repo. | |
| Usage: | |
| python3 db_sync.py restore # pull DB from HF repo β /data/ls/ | |
| python3 db_sync.py backup # push /data/ls/label_studio.sqlite3 β HF repo | |
| python3 db_sync.py watch # backup every INTERVAL seconds (run in background) | |
| """ | |
| import os | |
| import shutil | |
| import sys | |
| import time | |
| HF_TOKEN = os.environ.get("HF_TOKEN", "") | |
| BACKUP_REPO = "TrustLLMeu/saga-db-backup" | |
| DB_PATH = "/data/ls/label_studio.sqlite3" | |
| REMOTE_FILE = "label_studio.sqlite3" | |
| INTERVAL = 300 # backup every 5 minutes | |
| def _api(): | |
| from huggingface_hub import HfApi | |
| if not HF_TOKEN: | |
| raise RuntimeError("HF_TOKEN env var not set") | |
| return HfApi(token=HF_TOKEN) | |
| def restore(): | |
| """Download DB from HF backup repo if it exists. Returns True if restored.""" | |
| try: | |
| api = _api() | |
| # Check if backup file exists in repo | |
| files = api.list_repo_files(BACKUP_REPO, repo_type="dataset") | |
| if REMOTE_FILE not in list(files): | |
| print(f"[db_sync] No backup found in {BACKUP_REPO} β fresh start.", flush=True) | |
| return False | |
| print(f"[db_sync] Restoring DB from {BACKUP_REPO}...", flush=True) | |
| os.makedirs(os.path.dirname(DB_PATH), exist_ok=True) | |
| # Download to a temp file first, then atomically replace | |
| tmp = DB_PATH + ".restore_tmp" | |
| path = api.hf_hub_download( | |
| repo_id=BACKUP_REPO, | |
| filename=REMOTE_FILE, | |
| repo_type="dataset", | |
| local_dir=os.path.dirname(tmp), | |
| local_dir_use_symlinks=False, | |
| ) | |
| shutil.move(path, DB_PATH) | |
| size = os.path.getsize(DB_PATH) | |
| print(f"[db_sync] Restored DB ({size:,} bytes).", flush=True) | |
| return True | |
| except Exception as e: | |
| print(f"[db_sync] Restore failed: {e}", flush=True) | |
| return False | |
| def backup(): | |
| """Upload current DB to HF backup repo.""" | |
| if not os.path.exists(DB_PATH): | |
| print(f"[db_sync] No DB at {DB_PATH} β skipping backup.", flush=True) | |
| return False | |
| try: | |
| api = _api() | |
| size = os.path.getsize(DB_PATH) | |
| print(f"[db_sync] Backing up DB ({size:,} bytes) β {BACKUP_REPO}...", flush=True) | |
| api.upload_file( | |
| path_or_fileobj=DB_PATH, | |
| path_in_repo=REMOTE_FILE, | |
| repo_id=BACKUP_REPO, | |
| repo_type="dataset", | |
| commit_message="Auto-backup from HF Space", | |
| ) | |
| print(f"[db_sync] Backup complete.", flush=True) | |
| return True | |
| except Exception as e: | |
| print(f"[db_sync] Backup failed: {e}", flush=True) | |
| return False | |
| def watch(): | |
| """Run backup every INTERVAL seconds.""" | |
| print(f"[db_sync] Watch mode: backing up every {INTERVAL}s.", flush=True) | |
| while True: | |
| time.sleep(INTERVAL) | |
| backup() | |
| if __name__ == "__main__": | |
| cmd = sys.argv[1] if len(sys.argv) > 1 else "backup" | |
| if cmd == "restore": | |
| ok = restore() | |
| sys.exit(0 if ok else 1) | |
| elif cmd == "backup": | |
| ok = backup() | |
| sys.exit(0 if ok else 1) | |
| elif cmd == "watch": | |
| watch() | |
| else: | |
| print(f"Usage: db_sync.py restore|backup|watch", flush=True) | |
| sys.exit(1) | |