cc / tools /hf-backup.sh
hequ's picture
Update tools/hf-backup.sh
7e811cb verified
#!/bin/sh
# shellcheck shell=sh
set -e
TS(){ date "+%Y-%m-%d %H:%M:%S"; }
log(){ echo "[$(TS)] [hf-backup] $*" >&2; } # 日志走 stderr,避免污染命令替换
HF_TOKEN="${HF_TOKEN:-}"
DATASET_ID="${DATASET_ID:-}"
# 备份配置(相对路径更稳,和 -C / 配合)
SYNC_INTERVAL="${SYNC_INTERVAL:-3600}"
MAX_BACKUPS="${MAX_BACKUPS:-10}"
BACKUP_PREFIX="${BACKUP_PREFIX:-crs_backup}"
# 建议默认:数据目录 +(如有)redis 数据目录
BACKUP_PATHS="${BACKUP_PATHS:-app/data,app/data/redis}"
# 公开/半公开 dataset 可以更激进地排除敏感文件
BACKUP_EXCLUDE="${BACKUP_EXCLUDE:-app/data/init.json,app/data/users.json,*.tmp,*.cache,**/.DS_Store}"
HF_PY="/app/tools/hf_backup.py"
TMP_DIR="/tmp/crs_backup"
mkdir -p "${TMP_DIR}"
need_hf() {
if [ -z "$HF_TOKEN" ] || [ -z "$DATASET_ID" ]; then
log "HF_TOKEN/DATASET_ID 缺失,无法使用备份功能"; exit 1
fi
}
mk_archive() {
ts="$(date +%Y%m%d_%H%M%S)"
archive="${TMP_DIR}/${BACKUP_PREFIX}_${ts}.tar.gz"
# 构造 tar 参数(安全,不受空格/通配影响)
set -- -czf "$archive"
# --exclude
IFS=','; for x in $BACKUP_EXCLUDE; do
[ -z "$x" ] && continue
case "$x" in /*) x=${x#/} ;; esac
set -- "$@" --exclude="$x"
done; unset IFS
# 切到根目录
set -- "$@" -C /
# 打包路径
IFS=','; for p in $BACKUP_PATHS; do
[ -z "$p" ] && continue
case "$p" in /*) p=${p#/} ;; esac
set -- "$@" "$p"
done; unset IFS
log "创建归档:$archive"
tar "$@"
echo "$archive"
}
cmd_upload_once() {
need_hf
archive="$(mk_archive)" # 只捕获 stdout 的“文件路径”这一行
[ -f "$archive" ] || { log "归档不存在:$archive"; return 1; }
log "上传:$archive"
python3 "$HF_PY" upload \
--token "$HF_TOKEN" \
--repo "$DATASET_ID" \
--file "$archive" \
--prefix "$BACKUP_PREFIX" \
--max "$MAX_BACKUPS"
rm -f "$archive" || true
log "上传完成"
}
cmd_restore() {
need_hf
out="$(python3 "$HF_PY" restore --token "$HF_TOKEN" --repo "$DATASET_ID" --prefix "$BACKUP_PREFIX" 2>/dev/null || true)"
if [ -z "$out" ]; then
log "未找到可恢复的备份,跳过。"
exit 0
fi
latest_file="$out"
if [ ! -f "$latest_file" ]; then
log "本地文件不存在:$latest_file"; exit 1
fi
log "解压:$latest_file -> /"
tar -xzf "$latest_file" -C /
log "恢复完成"
}
cmd_daemon() {
need_hf
while :; do
log "开始周期备份(间隔 ${SYNC_INTERVAL}s,前缀 ${BACKUP_PREFIX},保留 ${MAX_BACKUPS})"
if ! cmd_upload_once; then
log "本轮备份失败(忽略,等待下次循环)"
fi
sleep "$SYNC_INTERVAL"
done
}
case "${1:-}" in
restore) cmd_restore ;;
once) cmd_upload_once ;;
daemon) cmd_daemon ;;
*) echo "用法: $0 {restore|once|daemon}" >&2; exit 1 ;;
esac