deeme commited on
Commit
194a6dc
·
verified ·
1 Parent(s): 9b35d58

Upload 2 files

Browse files
Files changed (2) hide show
  1. Dockerfile +2 -6
  2. sync_data.sh +93 -96
Dockerfile CHANGED
@@ -21,7 +21,6 @@ ENV TZ=Asia/Shanghai
21
  #ENV GID=1000
22
  ENV ENABLE_AUTH=True
23
  ENV WEBUI_AUTH=True
24
- ENV ENABLE_SIGNUP=false
25
 
26
  ENV DEFAULT_MODELS=gpt-4o-mini
27
  ENV RAG_EMBEDDING_ENGINE=openai
@@ -42,15 +41,12 @@ ENV ENABLE_REALTIME_CHAT_SAVE=false
42
 
43
  # 数据同步相关
44
  ENV SYNC_INTERVAL=600
45
- ENV DATASET_ID=deeme/ui
46
-
47
- RUN apt-get update && apt-get install -y python3 python3-pip
48
- RUN pip3 install --no-cache-dir huggingface_hub
49
 
50
  COPY sync_data.sh sync_data.sh
51
 
52
  RUN chmod -R 777 ./data && \
 
53
  chmod -R 777 /app/backend/open_webui/static && \
54
  chmod +x sync_data.sh && \
55
  sed -i "1r sync_data.sh" ./start.sh && \
56
- sed -i "1r redis-start.sh" ./start.sh
 
21
  #ENV GID=1000
22
  ENV ENABLE_AUTH=True
23
  ENV WEBUI_AUTH=True
 
24
 
25
  ENV DEFAULT_MODELS=gpt-4o-mini
26
  ENV RAG_EMBEDDING_ENGINE=openai
 
41
 
42
  # 数据同步相关
43
  ENV SYNC_INTERVAL=600
 
 
 
 
44
 
45
  COPY sync_data.sh sync_data.sh
46
 
47
  RUN chmod -R 777 ./data && \
48
+ chmod -R 777 ./open_webui && \
49
  chmod -R 777 /app/backend/open_webui/static && \
50
  chmod +x sync_data.sh && \
51
  sed -i "1r sync_data.sh" ./start.sh && \
52
+ sed -i "1r redis-start.sh" ./start.sh
sync_data.sh CHANGED
@@ -1,113 +1,110 @@
1
- #!/bin/sh
2
-
3
- # 检查环境变量
4
- if [ -z "$HF_TOKEN" ] || [ -z "$DATASET_ID" ]; then
5
- echo "缺少必要的环境变量 HF_TOKEN 或 DATASET_ID"
6
  exit 1
7
  fi
8
 
9
- # 上传备份
10
- cat > /tmp/hf_sync.py << 'EOL'
11
- from huggingface_hub import HfApi
12
- import sys
13
- import os
14
 
15
- def manage_backups(api, repo_id, max_files=50):
16
- files = api.list_repo_files(repo_id=repo_id, repo_type="dataset")
17
- backup_files = [f for f in files if f.startswith('webui_backup_') and f.endswith('.db')]
18
- backup_files.sort()
19
-
20
- if len(backup_files) >= max_files:
21
- files_to_delete = backup_files[:(len(backup_files) - max_files + 1)]
22
- for file_to_delete in files_to_delete:
23
- try:
24
- api.delete_file(path_in_repo=file_to_delete, repo_id=repo_id, repo_type="dataset")
25
- print(f'已删除旧备份: {file_to_delete}')
26
- except Exception as e:
27
- print(f'删除 {file_to_delete} 时出错: {str(e)}')
28
 
29
- def upload_backup(file_path, file_name, token, repo_id):
30
- api = HfApi(token=token)
31
- try:
32
- api.upload_file(
33
- path_or_fileobj=file_path,
34
- path_in_repo=file_name,
35
- repo_id=repo_id,
36
- repo_type="dataset"
37
- )
38
- print(f"成功上传 {file_name}")
39
-
40
- manage_backups(api, repo_id)
41
- except Exception as e:
42
- print(f"文件上传出错: {str(e)}")
43
 
44
- # 下载最新备份
45
- def download_latest_backup(token, repo_id):
46
- try:
47
- api = HfApi(token=token)
48
- files = api.list_repo_files(repo_id=repo_id, repo_type="dataset")
49
- backup_files = [f for f in files if f.startswith('webui_backup_') and f.endswith('.db')]
50
-
51
- if not backup_files:
52
- print("未找到备份文件")
53
- return
54
-
55
- latest_backup = sorted(backup_files)[-1]
 
 
 
 
56
 
57
- filepath = api.hf_hub_download(
58
- repo_id=repo_id,
59
- filename=latest_backup,
60
- repo_type="dataset"
61
- )
62
 
63
- if filepath and os.path.exists(filepath):
64
- os.makedirs('./data', exist_ok=True)
65
- os.system(f'cp "{filepath}" ./data/webui.db')
66
- print(f"成功从 {latest_backup} 恢复备份")
67
-
68
- except Exception as e:
69
- print(f"下载备份时出错: {str(e)}")
70
 
71
- if __name__ == "__main__":
72
- action = sys.argv[1]
73
- token = sys.argv[2]
74
- repo_id = sys.argv[3]
75
-
76
- if action == "upload":
77
- file_path = sys.argv[4]
78
- file_name = sys.argv[5]
79
- upload_backup(file_path, file_name, token, repo_id)
80
- elif action == "download":
81
- download_latest_backup(token, repo_id)
82
- EOL
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
- # 首次启动时下载最新备份
85
- echo "正在从 HuggingFace 下载最新备份..."
86
- python3 /tmp/hf_sync.py download "${HF_TOKEN}" "${DATASET_ID}"
87
 
88
- # 同步函数
89
- sync_data() {
90
- while true; do
91
- echo "开始同步进程 $(date)"
92
-
93
- if [ -f "./data/webui.db" ]; then
94
- timestamp=$(date +%Y%m%d_%H%M%S)
95
- backup_file="webui_backup_${timestamp}.db"
96
-
97
- # 复制数据库文件
98
- cp ./data/webui.db "/tmp/${backup_file}"
99
-
100
- echo "正在上传备份到 HuggingFace..."
101
- python3 /tmp/hf_sync.py upload "${HF_TOKEN}" "${DATASET_ID}" "/tmp/${backup_file}" "${backup_file}"
102
-
103
- rm -f "/tmp/${backup_file}"
104
  else
105
- echo "数据库文件不存在,等待下次同步..."
 
 
 
106
  fi
107
-
108
- SYNC_INTERVAL=${SYNC_INTERVAL:-7200}
109
- echo "下次同步将在 ${SYNC_INTERVAL} 秒后进行..."
 
110
  sleep $SYNC_INTERVAL
 
111
  done
112
  }
113
 
 
1
+ # 检查必要的环境变量
2
+ if [ -z "$G_NAME" ] || [ -z "$G_TOKEN" ]; then
3
+ echo "缺少必要的环境变量 G_NAME 或 G_TOKEN"
 
 
4
  exit 1
5
  fi
6
 
7
+ # 解析仓库名和用户名
8
+ IFS='/' read -r GITHUB_USER GITHUB_REPO <<< "$G_NAME"
 
 
 
9
 
10
+ # 构建 GitHub 仓库的克隆 URL,包含令牌
11
+ REPO_URL="https://${G_TOKEN}@github.com/${G_NAME}.git"
12
+ mkdir -p ./data/github_data
13
+ # 克隆仓库
14
+ echo "正在克隆仓库……"
15
+ git clone "$REPO_URL" ./data/github_data || {
16
+ echo "克隆失败,请检查 G_NAME G_TOKEN 是否正确。"
17
+ exit 1
18
+ }
 
 
 
 
19
 
20
+ if [ -f ./data/github_data/webui.db ]; then
21
+ cp ./data/github_data/webui.db ./data/webui.db
22
+ echo "从 GitHub 仓库中拉取成功"
23
+ else
24
+ echo "GitHub 仓库中未找到 webui.db,将在同步时推送"
25
+ fi
 
 
 
 
 
 
 
 
26
 
27
+ # 定义同步函数
28
+ sync_data() {
29
+ while true; do
30
+ # 1. 同步到 GitHub
31
+ echo "正在开始同步"
32
+ # 进入仓库目录
33
+ cd ./data/github_data
34
+ # 配置 Git 用户信息
35
+ git config user.name "AutoSync Bot"
36
+ git config user.email "autosync@bot.com"
37
+
38
+ # 确保在正确的分支
39
+ git checkout main || git checkout master
40
+
41
+ # 复制最新的数据库文件
42
+ # cp ../webui.db ./webui.db
43
 
44
+ if [ -f "../webui.db" ]; then
45
+ cp ../webui.db ./webui.db
46
+ else
47
+ echo "数据库尚未初始化"
48
+ fi
49
 
50
+ # 检查是否有变化
51
+ if [[ -n $(git status -s) ]]; then
52
+ # 添加所有变更
53
+ git add webui.db
 
 
 
54
 
55
+ # 提交变更
56
+ git commit -m "Auto sync webui.db $(date '+%Y-%m-%d %H:%M:%S')"
57
+
58
+ # 推送到远程仓库
59
+ git push origin HEAD && {
60
+ echo "GitHub推送成功"
61
+ }|| {
62
+ echo "推送失败,等待重试..."
63
+ sleep 10
64
+ git push origin HEAD || {
65
+ echo "重试失败,放弃推送到Github。"
66
+ }
67
+ }
68
+ # 返回上级目录
69
+ cd ..
70
+ cd ..
71
+
72
+ # 2. 同步到 WebDAV
73
+ if [ -z "$WEBDAV_URL" ] || [ -z "$WEBDAV_USERNAME" ] || [ -z "$WEBDAV_PASSWORD" ]; then
74
+ echo "WebDAV 环境变量缺失,跳过 WebDAV 同步。"
75
+ else
76
+ echo "同步到 WebDAV..."
77
+ FILENAME="webui_$(date +'%m_%d').db"
78
+ # 检查是否存在要上传的文件
79
+ if [ -f ./data/webui.db ]; then
80
+ # 使用 curl 进行文件上传
81
+ curl -T ./data/webui.db --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" "$WEBDAV_URL/$FILENAME" && {
82
+ echo "WebDAV 上传成功"
83
+ } || {
84
+ echo "WebDAV 上传失败,等待重试..."
85
+ sleep 10
86
+ curl -T ./data/webui.db --user "$WEBDAV_USERNAME:$WEBDAV_PASSWORD" "$WEBDAV_URL/$FILENAME" || {
87
+ echo "重试失败,放弃webdav上传。"
88
+ }
89
+ }
90
+ else
91
+ echo "未找到 webui.db 文件,跳过 WebDAV 同步"
92
+ fi
93
+ fi
94
 
 
 
 
95
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  else
97
+ # 返回上级目录
98
+ cd ..
99
+ cd ..
100
+ echo "GitHub: 没有检测到数据库变化"
101
  fi
102
+ # 3. 等待统一的时间间隔
103
+ SYNC_INTERVAL=${SYNC_INTERVAL:-7200} # 默认间隔时间为 7200 秒
104
+ echo "当前时间 $(date '+%Y-%m-%d %H:%M:%S')"
105
+ echo "等待 ${SYNC_INTERVAL} 秒后进行下一次同步..."
106
  sleep $SYNC_INTERVAL
107
+
108
  done
109
  }
110