omnibox / Dockerfile
wd21's picture
Update Dockerfile
4d3a9c0 verified
FROM lampon/omnibox:latest
# 安装系统依赖,并添加 --break-system-packages 以允许安装 huggingface_hub
RUN apt-get update && apt-get install -y python3 python3-pip socat && \
pip3 install --no-cache-dir huggingface_hub --break-system-packages && \
rm -rf /var/lib/apt/lists/*
WORKDIR /app
# 备份脚本
RUN cat > /usr/local/bin/backup.sh << 'EOF'
#!/bin/bash
set -e
if [ -z "$HF_DATASET" ] || [ -z "$HF_TOKEN" ]; then
echo "❌ 缺少 HF_DATASET 或 HF_TOKEN 环境变量,跳过备份"
exit 0
fi
echo "$(date): 📦 开始备份 omnibox 数据..."
mkdir -p /tmp/omnibox_backup
if [ -d /app/data ]; then
cp -r /app/data /tmp/omnibox_backup/
echo "✅ 已复制 /app/data"
else
echo "⚠️ 未找到 /app/data 目录,跳过数据复制"
fi
cd /tmp
tar -czf omnibox_backup.tar.gz omnibox_backup
BACKUP_FILE="/tmp/omnibox_backup.tar.gz"
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
REMOTE_PATH="omnibox_backup_${TIMESTAMP}.tar.gz"
python3 <<PYEOF
from huggingface_hub import HfApi
import os
api = HfApi()
repo_id = os.environ.get("HF_DATASET")
token = os.environ.get("HF_TOKEN")
backup_file = "$BACKUP_FILE"
remote_path = "$REMOTE_PATH"
try:
api.upload_file(
path_or_fileobj=backup_file,
path_in_repo=remote_path,
repo_id=repo_id,
repo_type="dataset",
token=token
)
print(f"✅ 备份上传成功: {remote_path}")
except Exception as e:
print(f"❌ 备份上传失败: {e}")
PYEOF
rm -rf /tmp/omnibox_backup /tmp/omnibox_backup.tar.gz
echo "$(date): 🎉 备份流程完成"
EOF
RUN chmod +x /usr/local/bin/backup.sh
# 启动脚本
RUN cat > /usr/local/bin/start.sh << 'EOF'
#!/bin/bash
set -e
# 恢复备份
if [ -n "$HF_DATASET" ] && [ -n "$HF_TOKEN" ]; then
echo "🔄 检查是否存在可恢复的备份..."
python3 <<PYEOF
from huggingface_hub import HfApi, hf_hub_download
import os, tarfile, shutil
repo_id = os.environ.get("HF_DATASET")
token = os.environ.get("HF_TOKEN")
api = HfApi()
try:
files = api.list_repo_files(repo_id=repo_id, repo_type="dataset", token=token)
backup_files = [f for f in files if f.startswith("omnibox_backup_") and f.endswith(".tar.gz")]
if not backup_files:
print("📭 未找到历史备份文件")
else:
backup_files.sort(reverse=True)
latest = backup_files[0]
print(f"📥 发现最新备份: {latest},正在下载...")
path = hf_hub_download(
repo_id=repo_id,
filename=latest,
repo_type="dataset",
token=token
)
with tarfile.open(path, "r:gz") as tar:
tar.extractall(path="/tmp/restore_data")
src_data = "/tmp/restore_data/omnibox_backup/data"
dst_data = "/app/data"
if os.path.exists(src_data):
if os.path.exists(dst_data):
shutil.rmtree(dst_data)
shutil.copytree(src_data, dst_data)
print("✅ 数据恢复成功")
else:
print("⚠️ 备份文件中未找到 /data 目录,跳过恢复")
shutil.rmtree("/tmp/restore_data", ignore_errors=True)
except Exception as e:
print(f"⚠️ 恢复过程出现异常: {e}")
PYEOF
fi
# 启动主服务
echo "🚀 启动 Omnibox 主服务..."
./main &
MAIN_PID=$!
socat TCP-LISTEN:7860,fork TCP:127.0.0.1:7023 &
SOCAT_PID=$!
# 定时备份
echo "⏰ 启动自动备份(每30分钟)..."
(
while true; do
sleep 1800
/usr/local/bin/backup.sh
done
) &
BACKUP_PID=$!
trap 'echo "🛑 容器停止,执行最后一次备份..."; /usr/local/bin/backup.sh; kill $MAIN_PID $SOCAT_PID $BACKUP_PID 2>/dev/null' EXIT
wait $MAIN_PID
EOF
RUN chmod +x /usr/local/bin/start.sh
EXPOSE 7860
CMD ["/usr/local/bin/start.sh"]