Open-WebUI / sync_data.sh
HuggingFace0920's picture
Update sync_data.sh
f6927a4 verified
#!/bin/sh
# 日志函数
log() {
echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1"
}
# 检查环境变量
if [ -z "$HF_TOKEN" ] || [ -z "$DATASET_ID" ]; then
log "错误: 未检测到 HF_TOKEN 或 DATASET_ID,备份功能不可用"
exit 1
fi
# 检查虚拟环境
if [ -f "$HOME/venv/bin/activate" ]; then
log "激活虚拟环境..."
. $HOME/venv/bin/activate
else
log "警告: 未找到虚拟环境,将使用系统Python"
fi
# 设置默认参数
SYNC_INTERVAL=${SYNC_INTERVAL:-7200} # 默认2小时
MAX_BACKUPS=${MAX_BACKUPS:-50} # 默认保留50个备份
MAX_BACKUP_SIZE=${MAX_BACKUP_SIZE:-100} # 默认备份大小限制100MB
STORAGE_PATH="$HOME/app/data"
# 生成同步脚本到正确路径
cat > /app/hf_sync.py << 'EOL'
... (此处为之前的hf_sync.py内容,保持不变)
EOL
# 首次启动时从 HuggingFace 下载最新备份
log "正在从 HuggingFace 下载最新备份..."
if python /app/hf_sync.py download "${HF_TOKEN}" "${DATASET_ID}" "$HOME/app"; then
log "备份恢复成功"
else
log "备份恢复失败,将继续启动应用"
fi
# 同步函数
sync_data() {
while true; do
log "同步进程启动"
# 确保数据目录存在
if [ -d "${STORAGE_PATH}" ]; then
# 创建备份
timestamp=$(date +%Y%m%d_%H%M%S)
backup_file="backup_${timestamp}.tar.gz"
temp_backup="/tmp/${backup_file}"
# 压缩目录(使用-C避免包含父路径)
log "正在创建备份..."
tar -czf "${temp_backup}" -C "$(dirname "${STORAGE_PATH}")" "$(basename "${STORAGE_PATH}")"
# 上传到 HuggingFace
log "正在上传备份到 HuggingFace..."
if python /app/hf_sync.py upload "${HF_TOKEN}" "${DATASET_ID}" "${temp_backup}" "${backup_file}" "${MAX_BACKUP_SIZE}"; then
log "备份上传成功"
else
log "备份上传失败"
fi
# 合并历史提交
SQUASH_FLAG_FILE="/tmp/last_squash_time"
NOW=$(date +%s)
SEVEN_DAYS=$((7*24*60*60))
if [ ! -f "$SQUASH_FLAG_FILE" ]; then
echo $NOW > "$SQUASH_FLAG_FILE"
log "首次合并历史提交..."
python /app/hf_sync.py super_squash "${HF_TOKEN}" "${DATASET_ID}"
else
LAST=$(cat "$SQUASH_FLAG_FILE")
DIFF=$((NOW - LAST))
if [ $DIFF -ge $SEVEN_DAYS ]; then
echo $NOW > "$SQUASH_FLAG_FILE"
log "距离上次合并已超过7天,正在合并历史提交..."
python /app/hf_sync.py super_squash "${HF_TOKEN}" "${DATASET_ID}"
else
log "距离上次合并未满7天,本次跳过合并历史提交。"
fi
fi
# 清理临时文件
rm -f "${temp_backup}"
else
log "存储目录 ${STORAGE_PATH} 不存在,等待中..."
fi
# 同步间隔
log "下次同步将在 ${SYNC_INTERVAL} 秒后进行..."
sleep $SYNC_INTERVAL
done
}
# 启动同步进程
log "启动数据同步后台进程..."
sync_data &
# 记录同步进程ID
echo $! > /tmp/sync_process.pid
log "同步进程已启动,PID: $(cat /tmp/sync_process.pid)"