Spaces:
Sleeping
Sleeping
| set -euo pipefail | |
| IFS=$'\n\t' | |
| # 自动 export 后面所有赋值 | |
| set -a | |
| log() { | |
| echo "[$(date +'%F %T')] $*" | |
| } | |
| # 1. init_backup | |
| init_backup(){ | |
| if [[ -n "${DATASET_ID:-}" ]]; then | |
| log "📁 使用外部定义的 DATASET_ID=$DATASET_ID" | |
| return 0 | |
| fi | |
| if [[ -z "${HF_TOKEN:-}" ]]; then | |
| log "⚠️ HF_TOKEN 未设置,跳过备份" | |
| return 1 | |
| fi | |
| USER_ID=$(python3 - <<'PY' | |
| import os,sys | |
| from huggingface_hub import HfApi | |
| try: | |
| name = HfApi(token=os.getenv("HF_TOKEN")).whoami().get("name","") | |
| print(name) if name else sys.exit(1) | |
| except: | |
| sys.exit(1) | |
| PY | |
| ) | |
| if [[ -z "$USER_ID" ]]; then | |
| log "⚠️ 获取 USER_ID 失败,跳过备份" | |
| return 1 | |
| fi | |
| DATASET_ID="${USER_ID}/data" | |
| # ← 这里修正了变量名 | |
| log "✅ 设置默认 DATASET_ID=$DATASET_ID" | |
| return 0 | |
| } | |
| # 2. prep_repo | |
| prep_repo(){ | |
| python3 <<'PY' | |
| import os | |
| from huggingface_hub import HfApi | |
| api = HfApi(token=os.getenv("HF_TOKEN")) | |
| repo = os.environ["DATASET_ID"] | |
| author = repo.split("/")[0] | |
| if not any(d.id == repo for d in api.list_datasets(author=author)): | |
| api.create_repo(repo_id=repo, repo_type="dataset", private=True) | |
| branch = "Chat-Share" | |
| refs = api.list_repo_refs(repo_id=repo, repo_type="dataset").branches | |
| if branch not in [b.name for b in refs]: | |
| api.create_branch(repo_id=repo, repo_type="dataset", branch=branch) | |
| PY | |
| log "✅ 数据集 & 分支就绪" | |
| } | |
| # 3. restore_latest | |
| restore_latest(){ | |
| python3 <<'PY' | |
| import os,sys,tarfile,tempfile | |
| from huggingface_hub import HfApi | |
| api = HfApi(token=os.getenv("HF_TOKEN")) | |
| repo, branch = os.getenv("DATASET_ID"), "Chat-Share" | |
| files = api.list_repo_files(repo_id=repo, repo_type="dataset", revision=branch) | |
| backs = sorted(f for f in files if f.endswith(".tar.gz")) | |
| if not backs: sys.exit(0) | |
| td = tempfile.mkdtemp() | |
| path = api.hf_hub_download(repo_id=repo, repo_type="dataset", | |
| revision=branch, filename=backs[-1], local_dir=td) | |
| with tarfile.open(path) as t: | |
| t.extractall(os.getenv("BACKUP_DIR")) | |
| PY | |
| log "✅ 恢复最新备份(如果有)" | |
| } | |
| # 4. do_backup | |
| do_backup(){ | |
| ts=$(date +%Y%m%d_%H%M%S) | |
| fname="Chat-Share_${ts}.tar.gz" | |
| tmp=$(mktemp -d) | |
| tar -czf "$tmp/$fname" -C "$BACKUP_DIR" . | |
| python3 <<PY | |
| import os | |
| from huggingface_hub import HfApi | |
| api = HfApi(token=os.getenv("HF_TOKEN")) | |
| repo, branch = os.getenv("DATASET_ID"), "Chat-Share" | |
| api.upload_file(path_or_fileobj="$tmp/$fname", | |
| path_in_repo="$fname", | |
| repo_id=repo, repo_type="dataset", | |
| revision=branch) | |
| keep = int(os.getenv("DATASET_NUM", "10")) | |
| files = api.list_repo_files(repo_id=repo, repo_type="dataset", revision=branch) | |
| backs = sorted(f for f in files if f.endswith(".tar.gz")) | |
| for old in backs[:-keep]: | |
| api.delete_file(path_in_repo=old, | |
| repo_id=repo, repo_type="dataset", revision=branch) | |
| api.super_squash_history(repo_id=repo, repo_type="dataset", branch=branch) | |
| PY | |
| rm -rf "$tmp" | |
| log "✅ 上传备份并清理临时文件" | |
| } | |
| # 5. sync_loop | |
| sync_loop(){ | |
| while true; do | |
| do_backup | |
| log "⏳ 下次同步在 ${SYNC_INTERVAL}s 后" | |
| sleep "${SYNC_INTERVAL}" | |
| done | |
| } | |
| # 主流程 | |
| main(){ | |
| BACKUP_DIR="${BACKUP_DIR:-$HOME/app/data}" | |
| DATASET_NUM="${DATASET_NUM:-10}" | |
| SYNC_INTERVAL="${SYNC_INTERVAL:-36000}" | |
| if init_backup; then | |
| log "🚀 启动备份/同步流程,使用数据集:$DATASET_ID" | |
| prep_repo | |
| restore_latest | |
| sync_loop & # 后台 | |
| else | |
| log "🚀 直接启动主应用,无备份/同步" | |
| fi | |
| exec python app.py | |
| } | |
| main | |