File size: 3,963 Bytes
7a352c6 cded1f5 7a352c6 cded1f5 7a352c6 cded1f5 7a352c6 cded1f5 7a352c6 cded1f5 7a352c6 cded1f5 7a352c6 cded1f5 7a352c6 cded1f5 7a352c6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 | #!/usr/bin/env python3
"""
Hindsight Restore — Download pg_dump from HF Dataset and restore into running PG.
Called AFTER Hindsight starts (PostgreSQL must be running).
Uses pg_restore --clean to replace the fresh empty database with backup data.
Usage (called by entrypoint.sh):
python3 /opt/backup/restore.py
Env vars:
HF_TOKEN — HuggingFace token (read access)
HF_BACKUP_REPO — Dataset repo (default: Arnwald84/atum-hindsight-backup)
"""
import glob
import os
import subprocess
import sys
from pathlib import Path
HF_TOKEN = os.environ.get("HF_TOKEN", "")
HF_REPO = os.environ.get("HF_BACKUP_REPO", "Arnwald84/atum-hindsight-backup")
PG_USER = "hindsight"
PG_PASSWORD = "hindsight"
PG_DATABASE = "hindsight"
PG_PORT = "5432"
def log(msg: str) -> None:
print(f"[RESTORE] {msg}", flush=True)
def find_pg_bin(name: str) -> str:
"""Find a PostgreSQL binary in the pg0 installation."""
pattern = os.path.expanduser(f"~/.pg0/installation/*/bin/{name}")
matches = sorted(glob.glob(pattern))
if matches:
return matches[-1]
raise FileNotFoundError(f"{name} not found in ~/.pg0/installation/")
EXIT_RESTORED = 0 # Data was restored — caller should restart Hindsight
EXIT_ERROR = 1 # Restore failed
EXIT_NO_BACKUP = 2 # No backup found — skip restart
def main() -> int:
"""Returns exit code: 0=restored, 1=error, 2=no backup."""
if not HF_TOKEN:
log("HF_TOKEN not set — skipping restore")
return EXIT_NO_BACKUP
try:
from huggingface_hub import HfApi, hf_hub_download
except ImportError:
log("huggingface_hub not installed — skipping restore")
return EXIT_NO_BACKUP
api = HfApi(token=HF_TOKEN)
# Check if backup exists
try:
files = list(api.list_repo_files(repo_id=HF_REPO, repo_type="dataset"))
except Exception as e:
log(f"Cannot access repo {HF_REPO}: {e}")
return EXIT_ERROR
if "snapshots/latest.pgdump" not in files:
log("No pg_dump backup found in HF Dataset — starting fresh")
return EXIT_NO_BACKUP
log(f"Downloading latest backup from {HF_REPO}...")
local_path = hf_hub_download(
repo_id=HF_REPO,
filename="snapshots/latest.pgdump",
repo_type="dataset",
token=HF_TOKEN,
cache_dir="/tmp/hf_cache",
)
size_kb = Path(local_path).stat().st_size / 1024
log(f"Downloaded: {size_kb:.0f} KB")
# Restore using pg_restore
pg_restore = find_pg_bin("pg_restore")
env = os.environ.copy()
env["PGPASSWORD"] = PG_PASSWORD
log("Restoring database...")
result = subprocess.run(
[
pg_restore,
"-U", PG_USER,
"-d", PG_DATABASE,
"-p", PG_PORT,
"--clean",
"--if-exists",
"--no-owner",
"--no-acl",
"--single-transaction",
local_path,
],
capture_output=True,
text=True,
env=env,
)
if result.returncode != 0:
stderr = result.stderr.strip()
# pg_restore often returns non-zero for harmless warnings
# (e.g., "table does not exist" during --clean --if-exists)
real_errors = [
line for line in stderr.split("\n")
if "ERROR" in line
and "does not exist" not in line
and "already exists" not in line
]
if real_errors:
log(f"pg_restore had errors: {'; '.join(real_errors[:5])}")
return EXIT_ERROR
else:
log("pg_restore completed (minor warnings only)")
else:
log("pg_restore completed successfully")
log("Restore complete — Hindsight should be restarted to load restored data")
return EXIT_RESTORED
if __name__ == "__main__":
try:
code = main()
sys.exit(code)
except Exception as e:
log(f"FAILED: {e}")
sys.exit(1)
|