import os, tarfile, tempfile from datetime import datetime, timezone from huggingface_hub import HfApi def env_int(name, default): try: return int(os.getenv(name, str(default))) except Exception: return default def make_tar(src_dir: str, out_path: str): with tarfile.open(out_path, "w:gz") as tar: tar.add(src_dir, arcname=os.path.basename(src_dir)) def main(): if os.getenv("BACKUP_ENABLE", "0") != "1": return token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_HUB_TOKEN") if not token: print("HF_TOKEN 未设置,跳过备份") return dataset_repo = os.getenv("BACKUP_DATASET_REPO", "").strip() if not dataset_repo: print("BACKUP_DATASET_REPO 未设置,跳过备份") return src_dir = os.getenv("BACKUP_SRC_DIR", "/home/user/work") keep_last = env_int("BACKUP_KEEP_LAST", 10) api = HfApi(token=token) api.create_repo(repo_id=dataset_repo, repo_type="dataset", exist_ok=True) ts = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S") backup_name = f"backups/work-{ts}.tar.gz" with tempfile.TemporaryDirectory() as tmp: local_path = os.path.join(tmp, f"work-{ts}.tar.gz") make_tar(src_dir, local_path) api.upload_file( path_or_fileobj=local_path, path_in_repo=backup_name, repo_id=dataset_repo, repo_type="dataset", commit_message=f"backup: {backup_name}", ) print(f"Uploaded: {backup_name}") files = api.list_repo_files(repo_id=dataset_repo, repo_type="dataset") backups = sorted([f for f in files if f.startswith("backups/work-") and f.endswith(".tar.gz")]) if keep_last > 0 and len(backups) > keep_last: for f in backups[:-keep_last]: api.delete_file( path_in_repo=f, repo_id=dataset_repo, repo_type="dataset", commit_message=f"prune: {f}", ) print(f"Deleted old backup: {f}") if __name__ == "__main__": main()