wwforonce commited on
Commit
d454b01
·
1 Parent(s): 198cb09

add storage

Browse files
Files changed (3) hide show
  1. Dockerfile +33 -9
  2. start_with_sync.sh +64 -0
  3. sync_storage.py +91 -0
Dockerfile CHANGED
@@ -1,10 +1,34 @@
1
- # FROM fossandroid/openwebui:latest
2
  FROM ghcr.io/open-webui/open-webui:main
3
- RUN apt update \
4
- && apt install -y gcc curl sudo
5
- RUN sudo find / \
6
- -path /proc -prune -o \
7
- -path /etc -prune -o \
8
- -path /dev -prune -o \
9
- -path /usr -prune -o \
10
- -exec chmod 777 {} \;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  FROM ghcr.io/open-webui/open-webui:main
2
+
3
+ # Install dependencies
4
+ RUN apt update && apt install -y \
5
+ gcc \
6
+ curl \
7
+ sudo \
8
+ git-lfs \
9
+ openssl \
10
+ && rm -rf /var/lib/apt/lists/*
11
+
12
+ # Install Python packages
13
+ RUN pip install --no-cache-dir \
14
+ huggingface_hub \
15
+ datasets
16
+
17
+ # Copy sync scripts
18
+ COPY sync_storage.py /app/sync_storage.py
19
+ COPY start_with_sync.sh /start.sh
20
+
21
+ # Make scripts executable
22
+ RUN chmod +x /app/sync_storage.py /start.sh
23
+
24
+ # Create data directory
25
+ RUN mkdir -p /app/data
26
+
27
+ # Set working directory
28
+ WORKDIR /app
29
+
30
+ # Expose port
31
+ EXPOSE 8080
32
+
33
+ # Start with sync
34
+ ENTRYPOINT ["/start.sh"]
start_with_sync.sh ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ set -e
4
+
5
+ # Configuration
6
+ export DATA_DIR="${DATA_DIR:-/app/data}"
7
+ export HF_STORAGE_REPO="${HF_STORAGE_REPO:-your-username/open-webui-storage}"
8
+ export SYNC_INTERVAL="${SYNC_INTERVAL:-300}" # 5 minutes
9
+
10
+ echo "Starting Open WebUI with HF Dataset persistence..."
11
+ echo "Data directory: $DATA_DIR"
12
+ echo "HF Repository: $HF_STORAGE_REPO"
13
+
14
+ # Create data directory
15
+ mkdir -p "$DATA_DIR"
16
+
17
+ # Download existing data on startup
18
+ echo "Syncing data from Hugging Face..."
19
+ python3 /app/sync_storage.py download
20
+
21
+ # Function to handle graceful shutdown
22
+ cleanup() {
23
+ echo "Shutting down gracefully..."
24
+
25
+ # Upload final data state
26
+ echo "Uploading final data state..."
27
+ python3 /app/sync_storage.py upload
28
+
29
+ # Kill background processes
30
+ kill $SYNC_PID 2>/dev/null || true
31
+ kill $WEBUI_PID 2>/dev/null || true
32
+
33
+ exit 0
34
+ }
35
+
36
+ # Set up signal handlers
37
+ trap cleanup SIGTERM SIGINT
38
+
39
+ # Background sync function
40
+ background_sync() {
41
+ while true; do
42
+ sleep $SYNC_INTERVAL
43
+ echo "Periodic sync to Hugging Face..."
44
+ python3 /app/sync_storage.py upload
45
+ done
46
+ }
47
+
48
+ # Start background sync
49
+ background_sync &
50
+ SYNC_PID=$!
51
+
52
+ # Start Open WebUI
53
+ echo "Starting Open WebUI..."
54
+
55
+ # Set environment variables for Open WebUI
56
+ export WEBUI_SECRET_KEY="${WEBUI_SECRET_KEY:-$(openssl rand -hex 32)}"
57
+ export DATA_DIR="$DATA_DIR"
58
+
59
+ # Start Open WebUI in background
60
+ /app/backend/start.sh &
61
+ WEBUI_PID=$!
62
+
63
+ # Wait for Open WebUI process
64
+ wait $WEBUI_PID
sync_storage.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ import os
3
+ import shutil
4
+ import json
5
+ from pathlib import Path
6
+ from huggingface_hub import HfApi, Repository, login
7
+ import tarfile
8
+ import tempfile
9
+
10
+ class HFStorageSync:
11
+ def __init__(self, repo_id, token=None, data_dir="/app/data"):
12
+ self.repo_id = repo_id # e.g., "username/open-webui-storage"
13
+ self.data_dir = Path(data_dir)
14
+ self.api = HfApi()
15
+
16
+ if token:
17
+ login(token=token)
18
+
19
+ def download_data(self):
20
+ """Download and extract data from HF dataset repo"""
21
+ try:
22
+ print("Downloading data from Hugging Face...")
23
+
24
+ # Download the data archive
25
+ file_path = self.api.hf_hub_download(
26
+ repo_id=self.repo_id,
27
+ filename="data.tar.gz",
28
+ repo_type="dataset"
29
+ )
30
+
31
+ # Extract to data directory
32
+ self.data_dir.mkdir(parents=True, exist_ok=True)
33
+
34
+ with tarfile.open(file_path, 'r:gz') as tar:
35
+ tar.extractall(self.data_dir)
36
+
37
+ print(f"Data extracted to {self.data_dir}")
38
+
39
+ except Exception as e:
40
+ print(f"No existing data found or error downloading: {e}")
41
+ self.data_dir.mkdir(parents=True, exist_ok=True)
42
+
43
+ def upload_data(self):
44
+ """Compress and upload data to HF dataset repo"""
45
+ try:
46
+ print("Uploading data to Hugging Face...")
47
+
48
+ # Create temporary archive
49
+ with tempfile.NamedTemporaryFile(suffix='.tar.gz', delete=False) as tmp:
50
+ with tarfile.open(tmp.name, 'w:gz') as tar:
51
+ if self.data_dir.exists():
52
+ tar.add(self.data_dir, arcname='.')
53
+
54
+ # Upload to HF
55
+ self.api.upload_file(
56
+ path_or_fileobj=tmp.name,
57
+ path_in_repo="data.tar.gz",
58
+ repo_id=self.repo_id,
59
+ repo_type="dataset",
60
+ commit_message="Update Open WebUI data"
61
+ )
62
+
63
+ # Clean up
64
+ os.unlink(tmp.name)
65
+
66
+ print("Data uploaded successfully")
67
+
68
+ except Exception as e:
69
+ print(f"Error uploading data: {e}")
70
+
71
+ def main():
72
+ import sys
73
+
74
+ repo_id = os.getenv("HF_STORAGE_REPO", "your-username/open-webui-storage")
75
+ token = os.getenv("HF_TOKEN")
76
+ data_dir = os.getenv("DATA_DIR", "/app/data")
77
+
78
+ sync = HFStorageSync(repo_id, token, data_dir)
79
+
80
+ if len(sys.argv) > 1:
81
+ if sys.argv[1] == "download":
82
+ sync.download_data()
83
+ elif sys.argv[1] == "upload":
84
+ sync.upload_data()
85
+ else:
86
+ print("Usage: sync_storage.py [download|upload]")
87
+ else:
88
+ print("Usage: sync_storage.py [download|upload]")
89
+
90
+ if __name__ == "__main__":
91
+ main()