wwforonce commited on
Commit
75384c4
·
1 Parent(s): 242a03f
Files changed (4) hide show
  1. Caddyfile +3 -0
  2. Dockerfile +53 -0
  3. start_with_sync.sh +102 -0
  4. sync_storage.py +188 -0
Caddyfile ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ :7860 {
2
+ reverse_proxy 127.0.0.1:5244
3
+ }
Dockerfile ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+ # Install dependencies
3
+ RUN apt update && apt install -y \
4
+ gcc \
5
+ curl \
6
+ wget \
7
+ sudo \
8
+ git-lfs \
9
+ openssl \
10
+ jq \
11
+ && rm -rf /var/lib/apt/lists/*
12
+
13
+ # Install Python packages
14
+ RUN pip install --no-cache-dir \
15
+ huggingface_hub \
16
+ datasets
17
+
18
+
19
+
20
+ # Add health check
21
+ HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
22
+ CMD curl -f http://localhost:7860/ || exit 1
23
+
24
+ COPY Caddyfile /app/Caddyfile
25
+
26
+ # Copy sync scripts
27
+ COPY sync_storage.py /app/sync_storage.py
28
+ COPY start_with_sync.sh /start.sh
29
+
30
+ # Make scripts executable
31
+ RUN chmod +x /app/sync_storage.py /start.sh
32
+
33
+
34
+
35
+ RUN openlist_url=$(curl -X 'GET' 'https://api.github.com/repos/OpenListTeam/OpenList/releases' -H 'accept: application/json' | jq -r '.[0].assets[] | .browser_download_url | select(. | endswith("linux-amd64.tar.gz"))') && \
36
+ echo download OpenList from $openlist_url && \
37
+ wget $openlist_url -O /tmp/openlist.tar.gz && \
38
+ mkdir -p /tmp/openlist && \
39
+ tar xvf /tmp/openlist.tar.gz -C /tmp/openlist && \
40
+ cp -r /tmp/openlist /app/
41
+
42
+ RUN caddy_url=$(curl -X 'GET' 'https://api.github.com/repos/caddyserver/caddy/releases' -H 'accept: application/json' | jq -r '.[0].assets[] | .browser_download_url | select(. | endswith("linux_amd64.tar.gz"))') && \
43
+ echo download caddy from $caddy_url && \
44
+ wget $caddy_url -O /tmp/caddy.tar.gz && \
45
+ mkdir -p /tmp/caddy && \
46
+ tar xvf /tmp/caddy.tar.gz -C /tmp/caddy && \
47
+ cp -r /tmp/caddy /app/
48
+
49
+
50
+ # Start with sync
51
+ ENTRYPOINT ["/start.sh"]
52
+
53
+
start_with_sync.sh ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ set -e
4
+
5
+ # Use /tmp for all writable data
6
+ export DATA_DIR="${DATA_DIR:-/tmp/app/data}"
7
+ export HF_STORAGE_REPO="${HF_STORAGE_REPO:-nxdev-org/open-webui-storage}"
8
+ export SYNC_INTERVAL="${SYNC_INTERVAL:-300}"
9
+
10
+ # Set all HuggingFace and cache directories to /tmp
11
+ export HF_HOME="/tmp/hf_cache"
12
+ export HUGGINGFACE_HUB_CACHE="/tmp/hf_cache"
13
+ export TRANSFORMERS_CACHE="/tmp/hf_cache"
14
+ export SENTENCE_TRANSFORMERS_HOME="/tmp/hf_cache"
15
+
16
+ # Override Open WebUI environment variables
17
+ export STATIC_DIR="/tmp/static"
18
+ export UPLOAD_DIR="/tmp/uploads"
19
+
20
+ echo "Starting Open WebUI with HF Dataset persistence..."
21
+ echo "Data directory: $DATA_DIR"
22
+ echo "HF Repository: $HF_STORAGE_REPO"
23
+ echo "HF Cache: $HF_HOME"
24
+
25
+ # Create all necessary directories
26
+ #mkdir -p "$DATA_DIR" "$HF_HOME" "$STATIC_DIR" "$UPLOAD_DIR"
27
+
28
+ #mkdir -p "$DATA_DIR" && chmod 777 "$DATA_DIR"
29
+ #mkdir -p "$HF_HOME" && chmod 777 "$HF_HOME"
30
+ #mkdir -p "$STATIC_DIR" && chmod 777 "$STATIC_DIR"
31
+ #mkdir -p "$UPLOAD_DIR" && chmod 777 "$UPLOAD_DIR"
32
+
33
+ # Test write permissions
34
+ #if touch "$DATA_DIR/test" 2>/dev/null; then
35
+ # rm "$DATA_DIR/test"
36
+ # echo "Data directory is writable"
37
+ #else
38
+ # echo "Warning: Data directory may not be writable"
39
+ #fi
40
+
41
+ # Check if HF_TOKEN is set
42
+ if [ -z "$HF_TOKEN" ]; then
43
+ echo "Warning: HF_TOKEN not set. Sync functionality will be limited."
44
+ else
45
+ echo "HF_TOKEN is set, proceeding with sync..."
46
+ fi
47
+
48
+ # Download existing data on startup
49
+ echo "Syncing data from Hugging Face..."
50
+ python3 /app/sync_storage.py download
51
+
52
+ # Function to handle graceful shutdown
53
+ cleanup() {
54
+ echo "Shutting down gracefully..."
55
+
56
+ # Upload final data state
57
+ # if [ -n "$HF_TOKEN" ]; then
58
+ # echo "Uploading final data state..."
59
+ # python3 /app/sync_storage.py upload
60
+ # fi
61
+
62
+ # Kill background processes
63
+ kill $SYNC_PID 2>/dev/null || true
64
+ kill $TASK_PID 2>/dev/null || true
65
+
66
+ exit 0
67
+ }
68
+
69
+ # Set up signal handlers
70
+ trap cleanup SIGTERM SIGINT
71
+ #trap "echo 'Cleanup triggered'; exit 0" SIGTERM SIGINT
72
+ # Background sync function
73
+ background_sync() {
74
+ if [ -n "$HF_TOKEN" ]; then
75
+ while true; do
76
+ sleep $SYNC_INTERVAL
77
+ echo "Periodic sync to Hugging Face..."
78
+ python3 /app/sync_storage.py upload
79
+ done
80
+ else
81
+ echo "Skipping background sync - no HF_TOKEN"
82
+ while true; do
83
+ sleep 3600
84
+ done
85
+ fi
86
+ }
87
+
88
+ # Start background sync
89
+ background_sync &
90
+ SYNC_PID=$!
91
+
92
+ # Start Open WebUI
93
+ echo "Starting caddy..."
94
+
95
+ # Start readeck in background
96
+ cp -r /app/openlist /tmp/openlist
97
+ cd /tmp/openlist && ./openlist start&
98
+ /app/caddy/caddy run /app/Caddyfile &
99
+ TASK_PID=$!
100
+
101
+ # Wait for Open WebUI process
102
+ wait $TASK_PID
sync_storage.py ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ import os
3
+ import shutil
4
+ import json
5
+ from pathlib import Path
6
+ from huggingface_hub import HfApi, create_repo
7
+ import tarfile
8
+ import tempfile
9
+
10
+ class HFStorageSync:
11
+ def __init__(self, repo_id, token=None, data_dir="/tmp/open-webui-data"):
12
+ self.repo_id = repo_id
13
+ self.data_dir = Path(data_dir)
14
+ self.token = token
15
+
16
+ # Initialize API with token directly
17
+ self.api = HfApi(token=token) if token else HfApi()
18
+
19
+ def ensure_repo_exists(self):
20
+ """Create repository if it doesn't exist"""
21
+ if not self.token:
22
+ print("No token provided, cannot create repository")
23
+ return False
24
+
25
+ try:
26
+ # Check if repo exists
27
+ repo_info = self.api.repo_info(repo_id=self.repo_id, repo_type="dataset")
28
+ print(f"Repository {self.repo_id} exists")
29
+ return True
30
+ except Exception as e:
31
+ print(f"Repository {self.repo_id} not found, attempting to create...")
32
+ try:
33
+ create_repo(
34
+ repo_id=self.repo_id,
35
+ repo_type="dataset",
36
+ token=self.token,
37
+ private=True, # Make it private by default
38
+ exist_ok=True
39
+ )
40
+ print(f"Created repository {self.repo_id}")
41
+
42
+ # Create initial README
43
+ readme_content = """# Open WebUI Storage
44
+
45
+ This dataset stores persistent data for Open WebUI deployment.
46
+
47
+ ## Contents
48
+
49
+ - `data.tar.gz`: Compressed archive containing all Open WebUI data including:
50
+ - User configurations
51
+ - Chat histories
52
+ - Uploaded files
53
+ - Database files
54
+
55
+ This repository is automatically managed by the Open WebUI sync system.
56
+ """
57
+
58
+ with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as tmp:
59
+ tmp.write(readme_content)
60
+ tmp.flush()
61
+
62
+ self.api.upload_file(
63
+ path_or_fileobj=tmp.name,
64
+ path_in_repo="README.md",
65
+ repo_id=self.repo_id,
66
+ repo_type="dataset",
67
+ commit_message="Initial repository setup",
68
+ token=self.token
69
+ )
70
+
71
+ os.unlink(tmp.name)
72
+
73
+ return True
74
+ except Exception as create_error:
75
+ print(f"Failed to create repository: {create_error}")
76
+ return False
77
+
78
+ def download_data(self):
79
+ """Download and extract data from HF dataset repo"""
80
+ try:
81
+ print("Downloading data from Hugging Face...")
82
+
83
+ # Ensure data directory exists and is writable
84
+ self.data_dir.mkdir(parents=True, exist_ok=True)
85
+
86
+ # Test write permissions
87
+ test_file = self.data_dir / "test_write"
88
+ try:
89
+ test_file.touch()
90
+ test_file.unlink()
91
+ print(f"Data directory {self.data_dir} is writable")
92
+ except Exception as e:
93
+ print(f"Warning: Data directory may not be writable: {e}")
94
+ return
95
+
96
+ if not self.token:
97
+ print("No HF_TOKEN provided, skipping download")
98
+ return
99
+
100
+ # Ensure repository exists
101
+ if not self.ensure_repo_exists():
102
+ print("Could not access or create repository")
103
+ return
104
+
105
+ # Try to download the data archive
106
+ try:
107
+ file_path = self.api.hf_hub_download(
108
+ repo_id=self.repo_id,
109
+ filename="data.tar.gz",
110
+ repo_type="dataset",
111
+ token=self.token
112
+ )
113
+
114
+ with tarfile.open(file_path, 'r:gz') as tar:
115
+ tar.extractall(self.data_dir)
116
+
117
+ print(f"Data extracted to {self.data_dir}")
118
+
119
+ except Exception as e:
120
+ print(f"No existing data found (this is normal for first run): {e}")
121
+
122
+ except Exception as e:
123
+ print(f"Error during download: {e}")
124
+
125
+ def upload_data(self):
126
+ """Compress and upload data to HF dataset repo"""
127
+ try:
128
+ if not self.token:
129
+ print("No HF_TOKEN provided, skipping upload")
130
+ return
131
+
132
+ print("Uploading data to Hugging Face...")
133
+
134
+ if not self.data_dir.exists() or not any(self.data_dir.iterdir()):
135
+ print("No data to upload")
136
+ return
137
+
138
+ # Ensure repository exists
139
+ if not self.ensure_repo_exists():
140
+ print("Could not access or create repository")
141
+ return
142
+
143
+ # Create temporary archive
144
+ with tempfile.NamedTemporaryFile(suffix='.tar.gz', delete=False) as tmp:
145
+ with tarfile.open(tmp.name, 'w:gz') as tar:
146
+ for item in self.data_dir.iterdir():
147
+ if item.name not in ["test_write", ".gitkeep"]: # Skip test files
148
+ tar.add(item, arcname=item.name)
149
+
150
+ # Upload to HF
151
+ self.api.upload_file(
152
+ path_or_fileobj=tmp.name,
153
+ path_in_repo="data.tar.gz",
154
+ repo_id=self.repo_id,
155
+ repo_type="dataset",
156
+ commit_message="Update Open WebUI data",
157
+ token=self.token
158
+ )
159
+
160
+ # Clean up
161
+ os.unlink(tmp.name)
162
+
163
+ print("Data uploaded successfully")
164
+
165
+ except Exception as e:
166
+ print(f"Error uploading data: {e}")
167
+
168
+ def main():
169
+ import sys
170
+
171
+ repo_id = os.getenv("HF_STORAGE_REPO", "nxdev-org/open-webui-storage")
172
+ token = os.getenv("HF_TOKEN")
173
+ data_dir = os.getenv("DATA_DIR", "/tmp/open-webui-data")
174
+
175
+ sync = HFStorageSync(repo_id, token, data_dir)
176
+
177
+ if len(sys.argv) > 1:
178
+ if sys.argv[1] == "download":
179
+ sync.download_data()
180
+ elif sys.argv[1] == "upload":
181
+ sync.upload_data()
182
+ else:
183
+ print("Usage: sync_storage.py [download|upload]")
184
+ else:
185
+ print("Usage: sync_storage.py [download|upload]")
186
+
187
+ if __name__ == "__main__":
188
+ main()