File size: 3,683 Bytes
1966c0d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 | #!/usr/bin/env bash
set -euo pipefail
IFS=$'\n\t'
# 自动 export 后面所有赋值
set -a
log() {
echo "[$(date +'%F %T')] $*"
}
# 1. init_backup
init_backup(){
if [[ -n "${DATASET_ID:-}" ]]; then
log "📁 使用外部定义的 DATASET_ID=$DATASET_ID"
return 0
fi
if [[ -z "${HF_TOKEN:-}" ]]; then
log "⚠️ HF_TOKEN 未设置,跳过备份"
return 1
fi
USER_ID=$(python3 - <<'PY'
import os,sys
from huggingface_hub import HfApi
try:
name = HfApi(token=os.getenv("HF_TOKEN")).whoami().get("name","")
print(name) if name else sys.exit(1)
except:
sys.exit(1)
PY
)
if [[ -z "$USER_ID" ]]; then
log "⚠️ 获取 USER_ID 失败,跳过备份"
return 1
fi
DATASET_ID="${USER_ID}/data"
# ← 这里修正了变量名
log "✅ 设置默认 DATASET_ID=$DATASET_ID"
return 0
}
# 2. prep_repo
prep_repo(){
python3 <<'PY'
import os
from huggingface_hub import HfApi
api = HfApi(token=os.getenv("HF_TOKEN"))
repo = os.environ["DATASET_ID"]
author = repo.split("/")[0]
if not any(d.id == repo for d in api.list_datasets(author=author)):
api.create_repo(repo_id=repo, repo_type="dataset", private=True)
branch = "Chat-Share"
refs = api.list_repo_refs(repo_id=repo, repo_type="dataset").branches
if branch not in [b.name for b in refs]:
api.create_branch(repo_id=repo, repo_type="dataset", branch=branch)
PY
log "✅ 数据集 & 分支就绪"
}
# 3. restore_latest
restore_latest(){
python3 <<'PY'
import os,sys,tarfile,tempfile
from huggingface_hub import HfApi
api = HfApi(token=os.getenv("HF_TOKEN"))
repo, branch = os.getenv("DATASET_ID"), "Chat-Share"
files = api.list_repo_files(repo_id=repo, repo_type="dataset", revision=branch)
backs = sorted(f for f in files if f.endswith(".tar.gz"))
if not backs: sys.exit(0)
td = tempfile.mkdtemp()
path = api.hf_hub_download(repo_id=repo, repo_type="dataset",
revision=branch, filename=backs[-1], local_dir=td)
with tarfile.open(path) as t:
t.extractall(os.getenv("BACKUP_DIR"))
PY
log "✅ 恢复最新备份(如果有)"
}
# 4. do_backup
do_backup(){
ts=$(date +%Y%m%d_%H%M%S)
fname="Chat-Share_${ts}.tar.gz"
tmp=$(mktemp -d)
tar -czf "$tmp/$fname" -C "$BACKUP_DIR" .
python3 <<PY
import os
from huggingface_hub import HfApi
api = HfApi(token=os.getenv("HF_TOKEN"))
repo, branch = os.getenv("DATASET_ID"), "Chat-Share"
api.upload_file(path_or_fileobj="$tmp/$fname",
path_in_repo="$fname",
repo_id=repo, repo_type="dataset",
revision=branch)
keep = int(os.getenv("DATASET_NUM", "10"))
files = api.list_repo_files(repo_id=repo, repo_type="dataset", revision=branch)
backs = sorted(f for f in files if f.endswith(".tar.gz"))
for old in backs[:-keep]:
api.delete_file(path_in_repo=old,
repo_id=repo, repo_type="dataset", revision=branch)
api.super_squash_history(repo_id=repo, repo_type="dataset", branch=branch)
PY
rm -rf "$tmp"
log "✅ 上传备份并清理临时文件"
}
# 5. sync_loop
sync_loop(){
while true; do
do_backup
log "⏳ 下次同步在 ${SYNC_INTERVAL}s 后"
sleep "${SYNC_INTERVAL}"
done
}
# 主流程
main(){
BACKUP_DIR="${BACKUP_DIR:-$HOME/app/data}"
DATASET_NUM="${DATASET_NUM:-10}"
SYNC_INTERVAL="${SYNC_INTERVAL:-36000}"
if init_backup; then
log "🚀 启动备份/同步流程,使用数据集:$DATASET_ID"
prep_repo
restore_latest
sync_loop & # 后台
else
log "🚀 直接启动主应用,无备份/同步"
fi
exec python app.py
}
main |