| import os, tarfile, tempfile |
| from datetime import datetime, timezone |
| from huggingface_hub import HfApi |
|
|
| def env_int(name, default): |
| try: |
| return int(os.getenv(name, str(default))) |
| except Exception: |
| return default |
|
|
| def make_tar(src_dir: str, out_path: str): |
| with tarfile.open(out_path, "w:gz") as tar: |
| tar.add(src_dir, arcname=os.path.basename(src_dir)) |
|
|
| def main(): |
| if os.getenv("BACKUP_ENABLE", "0") != "1": |
| return |
|
|
| token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_HUB_TOKEN") |
| if not token: |
| print("HF_TOKEN 未设置,跳过备份") |
| return |
|
|
| dataset_repo = os.getenv("BACKUP_DATASET_REPO", "").strip() |
| if not dataset_repo: |
| print("BACKUP_DATASET_REPO 未设置,跳过备份") |
| return |
|
|
| src_dir = os.getenv("BACKUP_SRC_DIR", "/home/user/work") |
| keep_last = env_int("BACKUP_KEEP_LAST", 10) |
|
|
| api = HfApi(token=token) |
| api.create_repo(repo_id=dataset_repo, repo_type="dataset", exist_ok=True) |
|
|
| ts = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S") |
| backup_name = f"backups/work-{ts}.tar.gz" |
|
|
| with tempfile.TemporaryDirectory() as tmp: |
| local_path = os.path.join(tmp, f"work-{ts}.tar.gz") |
| make_tar(src_dir, local_path) |
| api.upload_file( |
| path_or_fileobj=local_path, |
| path_in_repo=backup_name, |
| repo_id=dataset_repo, |
| repo_type="dataset", |
| commit_message=f"backup: {backup_name}", |
| ) |
| print(f"Uploaded: {backup_name}") |
|
|
| files = api.list_repo_files(repo_id=dataset_repo, repo_type="dataset") |
| backups = sorted([f for f in files if f.startswith("backups/work-") and f.endswith(".tar.gz")]) |
|
|
| if keep_last > 0 and len(backups) > keep_last: |
| for f in backups[:-keep_last]: |
| api.delete_file( |
| path_in_repo=f, |
| repo_id=dataset_repo, |
| repo_type="dataset", |
| commit_message=f"prune: {f}", |
| ) |
| print(f"Deleted old backup: {f}") |
|
|
| if __name__ == "__main__": |
| main() |