#!/usr/bin/env bash set -euo pipefail IFS=$'\n\t' # 自动 export 后面所有赋值 set -a log() { echo "[$(date +'%F %T')] $*" } # 1. init_backup init_backup(){ if [[ -n "${DATASET_ID:-}" ]]; then log "📁 使用外部定义的 DATASET_ID=$DATASET_ID" return 0 fi if [[ -z "${HF_TOKEN:-}" ]]; then log "⚠️ HF_TOKEN 未设置,跳过备份" return 1 fi USER_ID=$(python3 - <<'PY' import os,sys from huggingface_hub import HfApi try: name = HfApi(token=os.getenv("HF_TOKEN")).whoami().get("name","") print(name) if name else sys.exit(1) except: sys.exit(1) PY ) if [[ -z "$USER_ID" ]]; then log "⚠️ 获取 USER_ID 失败,跳过备份" return 1 fi DATASET_ID="${USER_ID}/data" # ← 这里修正了变量名 log "✅ 设置默认 DATASET_ID=$DATASET_ID" return 0 } # 2. prep_repo prep_repo(){ python3 <<'PY' import os from huggingface_hub import HfApi api = HfApi(token=os.getenv("HF_TOKEN")) repo = os.environ["DATASET_ID"] author = repo.split("/")[0] if not any(d.id == repo for d in api.list_datasets(author=author)): api.create_repo(repo_id=repo, repo_type="dataset", private=True) branch = "Chat-Share" refs = api.list_repo_refs(repo_id=repo, repo_type="dataset").branches if branch not in [b.name for b in refs]: api.create_branch(repo_id=repo, repo_type="dataset", branch=branch) PY log "✅ 数据集 & 分支就绪" } # 3. restore_latest restore_latest(){ python3 <<'PY' import os,sys,tarfile,tempfile from huggingface_hub import HfApi api = HfApi(token=os.getenv("HF_TOKEN")) repo, branch = os.getenv("DATASET_ID"), "Chat-Share" files = api.list_repo_files(repo_id=repo, repo_type="dataset", revision=branch) backs = sorted(f for f in files if f.endswith(".tar.gz")) if not backs: sys.exit(0) td = tempfile.mkdtemp() path = api.hf_hub_download(repo_id=repo, repo_type="dataset", revision=branch, filename=backs[-1], local_dir=td) with tarfile.open(path) as t: t.extractall(os.getenv("BACKUP_DIR")) PY log "✅ 恢复最新备份(如果有)" } # 4. do_backup do_backup(){ ts=$(date +%Y%m%d_%H%M%S) fname="Chat-Share_${ts}.tar.gz" tmp=$(mktemp -d) tar -czf "$tmp/$fname" -C "$BACKUP_DIR" . python3 <