Code / backup_once.py
gallyg's picture
Upload 11 files
632b0a7 verified
import os, tarfile, tempfile
from datetime import datetime, timezone
from huggingface_hub import HfApi
def env_int(name, default):
try:
return int(os.getenv(name, str(default)))
except Exception:
return default
def make_tar(src_dir: str, out_path: str):
with tarfile.open(out_path, "w:gz") as tar:
tar.add(src_dir, arcname=os.path.basename(src_dir))
def main():
if os.getenv("BACKUP_ENABLE", "0") != "1":
return
token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACE_HUB_TOKEN")
if not token:
print("HF_TOKEN 未设置,跳过备份")
return
dataset_repo = os.getenv("BACKUP_DATASET_REPO", "").strip()
if not dataset_repo:
print("BACKUP_DATASET_REPO 未设置,跳过备份")
return
src_dir = os.getenv("BACKUP_SRC_DIR", "/home/user/work")
keep_last = env_int("BACKUP_KEEP_LAST", 10)
api = HfApi(token=token)
api.create_repo(repo_id=dataset_repo, repo_type="dataset", exist_ok=True)
ts = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S")
backup_name = f"backups/work-{ts}.tar.gz"
with tempfile.TemporaryDirectory() as tmp:
local_path = os.path.join(tmp, f"work-{ts}.tar.gz")
make_tar(src_dir, local_path)
api.upload_file(
path_or_fileobj=local_path,
path_in_repo=backup_name,
repo_id=dataset_repo,
repo_type="dataset",
commit_message=f"backup: {backup_name}",
)
print(f"Uploaded: {backup_name}")
files = api.list_repo_files(repo_id=dataset_repo, repo_type="dataset")
backups = sorted([f for f in files if f.startswith("backups/work-") and f.endswith(".tar.gz")])
if keep_last > 0 and len(backups) > keep_last:
for f in backups[:-keep_last]:
api.delete_file(
path_in_repo=f,
repo_id=dataset_repo,
repo_type="dataset",
commit_message=f"prune: {f}",
)
print(f"Deleted old backup: {f}")
if __name__ == "__main__":
main()