Spaces:
Runtime error
Runtime error
| #!/usr/bin/env python3 | |
| """ | |
| Hindsight Restore β Download pg_dump from HF Dataset and restore into running PG. | |
| Called AFTER Hindsight starts (PostgreSQL must be running). | |
| Uses pg_restore --clean to replace the fresh empty database with backup data. | |
| Usage (called by entrypoint.sh): | |
| python3 /opt/backup/restore.py | |
| Env vars: | |
| HF_TOKEN β HuggingFace token (read access) | |
| HF_BACKUP_REPO β Dataset repo (default: Arnwald84/atum-hindsight-backup) | |
| """ | |
| import glob | |
| import os | |
| import subprocess | |
| import sys | |
| from pathlib import Path | |
| HF_TOKEN = os.environ.get("HF_TOKEN", "") | |
| HF_REPO = os.environ.get("HF_BACKUP_REPO", "Arnwald84/atum-hindsight-backup") | |
| PG_USER = "hindsight" | |
| PG_PASSWORD = "hindsight" | |
| PG_DATABASE = "hindsight" | |
| PG_PORT = "5432" | |
| def log(msg: str) -> None: | |
| print(f"[RESTORE] {msg}", flush=True) | |
| def find_pg_bin(name: str) -> str: | |
| """Find a PostgreSQL binary in the pg0 installation.""" | |
| pattern = os.path.expanduser(f"~/.pg0/installation/*/bin/{name}") | |
| matches = sorted(glob.glob(pattern)) | |
| if matches: | |
| return matches[-1] | |
| raise FileNotFoundError(f"{name} not found in ~/.pg0/installation/") | |
| EXIT_RESTORED = 0 # Data was restored β caller should restart Hindsight | |
| EXIT_ERROR = 1 # Restore failed | |
| EXIT_NO_BACKUP = 2 # No backup found β skip restart | |
| def main() -> int: | |
| """Returns exit code: 0=restored, 1=error, 2=no backup.""" | |
| if not HF_TOKEN: | |
| log("HF_TOKEN not set β skipping restore") | |
| return EXIT_NO_BACKUP | |
| try: | |
| from huggingface_hub import HfApi, hf_hub_download | |
| except ImportError: | |
| log("huggingface_hub not installed β skipping restore") | |
| return EXIT_NO_BACKUP | |
| api = HfApi(token=HF_TOKEN) | |
| # Check if backup exists | |
| try: | |
| files = list(api.list_repo_files(repo_id=HF_REPO, repo_type="dataset")) | |
| except Exception as e: | |
| log(f"Cannot access repo {HF_REPO}: {e}") | |
| return EXIT_ERROR | |
| if "snapshots/latest.pgdump" not in files: | |
| log("No pg_dump backup found in HF Dataset β starting fresh") | |
| return EXIT_NO_BACKUP | |
| log(f"Downloading latest backup from {HF_REPO}...") | |
| local_path = hf_hub_download( | |
| repo_id=HF_REPO, | |
| filename="snapshots/latest.pgdump", | |
| repo_type="dataset", | |
| token=HF_TOKEN, | |
| cache_dir="/tmp/hf_cache", | |
| ) | |
| size_kb = Path(local_path).stat().st_size / 1024 | |
| log(f"Downloaded: {size_kb:.0f} KB") | |
| # Restore using pg_restore | |
| pg_restore = find_pg_bin("pg_restore") | |
| env = os.environ.copy() | |
| env["PGPASSWORD"] = PG_PASSWORD | |
| log("Restoring database...") | |
| result = subprocess.run( | |
| [ | |
| pg_restore, | |
| "-U", PG_USER, | |
| "-d", PG_DATABASE, | |
| "-p", PG_PORT, | |
| "--clean", | |
| "--if-exists", | |
| "--no-owner", | |
| "--no-acl", | |
| "--single-transaction", | |
| local_path, | |
| ], | |
| capture_output=True, | |
| text=True, | |
| env=env, | |
| ) | |
| if result.returncode != 0: | |
| stderr = result.stderr.strip() | |
| # pg_restore often returns non-zero for harmless warnings | |
| # (e.g., "table does not exist" during --clean --if-exists) | |
| real_errors = [ | |
| line for line in stderr.split("\n") | |
| if "ERROR" in line | |
| and "does not exist" not in line | |
| and "already exists" not in line | |
| ] | |
| if real_errors: | |
| log(f"pg_restore had errors: {'; '.join(real_errors[:5])}") | |
| return EXIT_ERROR | |
| else: | |
| log("pg_restore completed (minor warnings only)") | |
| else: | |
| log("pg_restore completed successfully") | |
| log("Restore complete β Hindsight should be restarted to load restored data") | |
| return EXIT_RESTORED | |
| if __name__ == "__main__": | |
| try: | |
| code = main() | |
| sys.exit(code) | |
| except Exception as e: | |
| log(f"FAILED: {e}") | |
| sys.exit(1) | |