"""Backup Aether data dir to HF Dataset as latest-backup.tar.gz. Tar-gzips AETHER_BACKUP_DIR contents into a temp file and uploads to the Dataset repo, replacing latest-backup.tar.gz. A timestamped copy is also kept under history/ for the most recent ~30 backups (older are pruned). """ import os import sys import tarfile import tempfile from datetime import datetime, timezone from pathlib import Path from huggingface_hub import HfApi from huggingface_hub.utils import RepositoryNotFoundError token = os.environ["HF_TOKEN"] repo_id = os.environ["AETHER_BACKUP_REPO"] src_dir = Path(os.environ.get("AETHER_BACKUP_DIR", "/opt/aether/data")) keep_history = int(os.environ.get("AETHER_BACKUP_KEEP_COUNT", "30")) filename_latest = os.environ.get("AETHER_BACKUP_LATEST_NAME", "latest-backup.tar.gz") if not src_dir.exists() or not any(src_dir.iterdir()): print(f"aether-backup: src {src_dir} empty, skip", file=sys.stderr) sys.exit(0) api = HfApi(token=token) ts = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ") tmpdir = Path(tempfile.mkdtemp(prefix="aether-backup-")) archive_path = tmpdir / f"aether-backup-{ts}.tar.gz" with tarfile.open(archive_path, "w:gz") as tar: tar.add(src_dir, arcname=src_dir.name) print(f"aether-backup: created archive {archive_path} ({archive_path.stat().st_size} bytes)", file=sys.stderr) # upload as latest api.upload_file( path_or_fileobj=str(archive_path), path_in_repo=filename_latest, repo_id=repo_id, repo_type="dataset", commit_message=f"backup {ts}", ) # also keep timestamped history hist_path = f"history/aether-backup-{ts}.tar.gz" api.upload_file( path_or_fileobj=str(archive_path), path_in_repo=hist_path, repo_id=repo_id, repo_type="dataset", commit_message=f"backup history {ts}", ) print(f"aether-backup: uploaded {filename_latest} and {hist_path}", file=sys.stderr) # prune old history try: files = api.list_repo_files(repo_id=repo_id, repo_type="dataset") history_files = sorted(f for f in files if f.startswith("history/aether-backup-")) overflow = history_files[:-keep_history] if len(history_files) > keep_history else [] for old in overflow: try: api.delete_file( path_in_repo=old, repo_id=repo_id, repo_type="dataset", commit_message=f"prune {old}", ) print(f"aether-backup: pruned {old}", file=sys.stderr) except Exception as exc: print(f"aether-backup: prune {old} failed: {exc}", file=sys.stderr) except RepositoryNotFoundError: pass # cleanup try: archive_path.unlink(missing_ok=True) tmpdir.rmdir() except Exception: pass