aether-hf / scripts /aether_backup.py
iiioooo1's picture
feat: initial Aether HF Space deploy (SQLite + Dataset backup + auto-update cron)
a9e8b86 verified
Raw
History Blame Contribute Delete
2.71 kB
"""Backup Aether data dir to HF Dataset as latest-backup.tar.gz.
Tar-gzips AETHER_BACKUP_DIR contents into a temp file and uploads to the
Dataset repo, replacing latest-backup.tar.gz. A timestamped copy is also
kept under history/ for the most recent ~30 backups (older are pruned).
"""
import os
import sys
import tarfile
import tempfile
from datetime import datetime, timezone
from pathlib import Path
from huggingface_hub import HfApi
from huggingface_hub.utils import RepositoryNotFoundError
token = os.environ["HF_TOKEN"]
repo_id = os.environ["AETHER_BACKUP_REPO"]
src_dir = Path(os.environ.get("AETHER_BACKUP_DIR", "/opt/aether/data"))
keep_history = int(os.environ.get("AETHER_BACKUP_KEEP_COUNT", "30"))
filename_latest = os.environ.get("AETHER_BACKUP_LATEST_NAME", "latest-backup.tar.gz")
if not src_dir.exists() or not any(src_dir.iterdir()):
print(f"aether-backup: src {src_dir} empty, skip", file=sys.stderr)
sys.exit(0)
api = HfApi(token=token)
ts = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
tmpdir = Path(tempfile.mkdtemp(prefix="aether-backup-"))
archive_path = tmpdir / f"aether-backup-{ts}.tar.gz"
with tarfile.open(archive_path, "w:gz") as tar:
tar.add(src_dir, arcname=src_dir.name)
print(f"aether-backup: created archive {archive_path} ({archive_path.stat().st_size} bytes)", file=sys.stderr)
# upload as latest
api.upload_file(
path_or_fileobj=str(archive_path),
path_in_repo=filename_latest,
repo_id=repo_id,
repo_type="dataset",
commit_message=f"backup {ts}",
)
# also keep timestamped history
hist_path = f"history/aether-backup-{ts}.tar.gz"
api.upload_file(
path_or_fileobj=str(archive_path),
path_in_repo=hist_path,
repo_id=repo_id,
repo_type="dataset",
commit_message=f"backup history {ts}",
)
print(f"aether-backup: uploaded {filename_latest} and {hist_path}", file=sys.stderr)
# prune old history
try:
files = api.list_repo_files(repo_id=repo_id, repo_type="dataset")
history_files = sorted(f for f in files if f.startswith("history/aether-backup-"))
overflow = history_files[:-keep_history] if len(history_files) > keep_history else []
for old in overflow:
try:
api.delete_file(
path_in_repo=old,
repo_id=repo_id,
repo_type="dataset",
commit_message=f"prune {old}",
)
print(f"aether-backup: pruned {old}", file=sys.stderr)
except Exception as exc:
print(f"aether-backup: prune {old} failed: {exc}", file=sys.stderr)
except RepositoryNotFoundError:
pass
# cleanup
try:
archive_path.unlink(missing_ok=True)
tmpdir.rmdir()
except Exception:
pass