Spaces:
Paused
Paused
add storage
Browse files- Dockerfile +33 -9
- start_with_sync.sh +64 -0
- sync_storage.py +91 -0
Dockerfile
CHANGED
|
@@ -1,10 +1,34 @@
|
|
| 1 |
-
# FROM fossandroid/openwebui:latest
|
| 2 |
FROM ghcr.io/open-webui/open-webui:main
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
RUN
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
FROM ghcr.io/open-webui/open-webui:main
|
| 2 |
+
|
| 3 |
+
# Install dependencies
|
| 4 |
+
RUN apt update && apt install -y \
|
| 5 |
+
gcc \
|
| 6 |
+
curl \
|
| 7 |
+
sudo \
|
| 8 |
+
git-lfs \
|
| 9 |
+
openssl \
|
| 10 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 11 |
+
|
| 12 |
+
# Install Python packages
|
| 13 |
+
RUN pip install --no-cache-dir \
|
| 14 |
+
huggingface_hub \
|
| 15 |
+
datasets
|
| 16 |
+
|
| 17 |
+
# Copy sync scripts
|
| 18 |
+
COPY sync_storage.py /app/sync_storage.py
|
| 19 |
+
COPY start_with_sync.sh /start.sh
|
| 20 |
+
|
| 21 |
+
# Make scripts executable
|
| 22 |
+
RUN chmod +x /app/sync_storage.py /start.sh
|
| 23 |
+
|
| 24 |
+
# Create data directory
|
| 25 |
+
RUN mkdir -p /app/data
|
| 26 |
+
|
| 27 |
+
# Set working directory
|
| 28 |
+
WORKDIR /app
|
| 29 |
+
|
| 30 |
+
# Expose port
|
| 31 |
+
EXPOSE 8080
|
| 32 |
+
|
| 33 |
+
# Start with sync
|
| 34 |
+
ENTRYPOINT ["/start.sh"]
|
start_with_sync.sh
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
|
| 3 |
+
set -e
|
| 4 |
+
|
| 5 |
+
# Configuration
|
| 6 |
+
export DATA_DIR="${DATA_DIR:-/app/data}"
|
| 7 |
+
export HF_STORAGE_REPO="${HF_STORAGE_REPO:-your-username/open-webui-storage}"
|
| 8 |
+
export SYNC_INTERVAL="${SYNC_INTERVAL:-300}" # 5 minutes
|
| 9 |
+
|
| 10 |
+
echo "Starting Open WebUI with HF Dataset persistence..."
|
| 11 |
+
echo "Data directory: $DATA_DIR"
|
| 12 |
+
echo "HF Repository: $HF_STORAGE_REPO"
|
| 13 |
+
|
| 14 |
+
# Create data directory
|
| 15 |
+
mkdir -p "$DATA_DIR"
|
| 16 |
+
|
| 17 |
+
# Download existing data on startup
|
| 18 |
+
echo "Syncing data from Hugging Face..."
|
| 19 |
+
python3 /app/sync_storage.py download
|
| 20 |
+
|
| 21 |
+
# Function to handle graceful shutdown
|
| 22 |
+
cleanup() {
|
| 23 |
+
echo "Shutting down gracefully..."
|
| 24 |
+
|
| 25 |
+
# Upload final data state
|
| 26 |
+
echo "Uploading final data state..."
|
| 27 |
+
python3 /app/sync_storage.py upload
|
| 28 |
+
|
| 29 |
+
# Kill background processes
|
| 30 |
+
kill $SYNC_PID 2>/dev/null || true
|
| 31 |
+
kill $WEBUI_PID 2>/dev/null || true
|
| 32 |
+
|
| 33 |
+
exit 0
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
# Set up signal handlers
|
| 37 |
+
trap cleanup SIGTERM SIGINT
|
| 38 |
+
|
| 39 |
+
# Background sync function
|
| 40 |
+
background_sync() {
|
| 41 |
+
while true; do
|
| 42 |
+
sleep $SYNC_INTERVAL
|
| 43 |
+
echo "Periodic sync to Hugging Face..."
|
| 44 |
+
python3 /app/sync_storage.py upload
|
| 45 |
+
done
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
# Start background sync
|
| 49 |
+
background_sync &
|
| 50 |
+
SYNC_PID=$!
|
| 51 |
+
|
| 52 |
+
# Start Open WebUI
|
| 53 |
+
echo "Starting Open WebUI..."
|
| 54 |
+
|
| 55 |
+
# Set environment variables for Open WebUI
|
| 56 |
+
export WEBUI_SECRET_KEY="${WEBUI_SECRET_KEY:-$(openssl rand -hex 32)}"
|
| 57 |
+
export DATA_DIR="$DATA_DIR"
|
| 58 |
+
|
| 59 |
+
# Start Open WebUI in background
|
| 60 |
+
/app/backend/start.sh &
|
| 61 |
+
WEBUI_PID=$!
|
| 62 |
+
|
| 63 |
+
# Wait for Open WebUI process
|
| 64 |
+
wait $WEBUI_PID
|
sync_storage.py
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
import os
|
| 3 |
+
import shutil
|
| 4 |
+
import json
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
from huggingface_hub import HfApi, Repository, login
|
| 7 |
+
import tarfile
|
| 8 |
+
import tempfile
|
| 9 |
+
|
| 10 |
+
class HFStorageSync:
|
| 11 |
+
def __init__(self, repo_id, token=None, data_dir="/app/data"):
|
| 12 |
+
self.repo_id = repo_id # e.g., "username/open-webui-storage"
|
| 13 |
+
self.data_dir = Path(data_dir)
|
| 14 |
+
self.api = HfApi()
|
| 15 |
+
|
| 16 |
+
if token:
|
| 17 |
+
login(token=token)
|
| 18 |
+
|
| 19 |
+
def download_data(self):
|
| 20 |
+
"""Download and extract data from HF dataset repo"""
|
| 21 |
+
try:
|
| 22 |
+
print("Downloading data from Hugging Face...")
|
| 23 |
+
|
| 24 |
+
# Download the data archive
|
| 25 |
+
file_path = self.api.hf_hub_download(
|
| 26 |
+
repo_id=self.repo_id,
|
| 27 |
+
filename="data.tar.gz",
|
| 28 |
+
repo_type="dataset"
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
# Extract to data directory
|
| 32 |
+
self.data_dir.mkdir(parents=True, exist_ok=True)
|
| 33 |
+
|
| 34 |
+
with tarfile.open(file_path, 'r:gz') as tar:
|
| 35 |
+
tar.extractall(self.data_dir)
|
| 36 |
+
|
| 37 |
+
print(f"Data extracted to {self.data_dir}")
|
| 38 |
+
|
| 39 |
+
except Exception as e:
|
| 40 |
+
print(f"No existing data found or error downloading: {e}")
|
| 41 |
+
self.data_dir.mkdir(parents=True, exist_ok=True)
|
| 42 |
+
|
| 43 |
+
def upload_data(self):
|
| 44 |
+
"""Compress and upload data to HF dataset repo"""
|
| 45 |
+
try:
|
| 46 |
+
print("Uploading data to Hugging Face...")
|
| 47 |
+
|
| 48 |
+
# Create temporary archive
|
| 49 |
+
with tempfile.NamedTemporaryFile(suffix='.tar.gz', delete=False) as tmp:
|
| 50 |
+
with tarfile.open(tmp.name, 'w:gz') as tar:
|
| 51 |
+
if self.data_dir.exists():
|
| 52 |
+
tar.add(self.data_dir, arcname='.')
|
| 53 |
+
|
| 54 |
+
# Upload to HF
|
| 55 |
+
self.api.upload_file(
|
| 56 |
+
path_or_fileobj=tmp.name,
|
| 57 |
+
path_in_repo="data.tar.gz",
|
| 58 |
+
repo_id=self.repo_id,
|
| 59 |
+
repo_type="dataset",
|
| 60 |
+
commit_message="Update Open WebUI data"
|
| 61 |
+
)
|
| 62 |
+
|
| 63 |
+
# Clean up
|
| 64 |
+
os.unlink(tmp.name)
|
| 65 |
+
|
| 66 |
+
print("Data uploaded successfully")
|
| 67 |
+
|
| 68 |
+
except Exception as e:
|
| 69 |
+
print(f"Error uploading data: {e}")
|
| 70 |
+
|
| 71 |
+
def main():
|
| 72 |
+
import sys
|
| 73 |
+
|
| 74 |
+
repo_id = os.getenv("HF_STORAGE_REPO", "your-username/open-webui-storage")
|
| 75 |
+
token = os.getenv("HF_TOKEN")
|
| 76 |
+
data_dir = os.getenv("DATA_DIR", "/app/data")
|
| 77 |
+
|
| 78 |
+
sync = HFStorageSync(repo_id, token, data_dir)
|
| 79 |
+
|
| 80 |
+
if len(sys.argv) > 1:
|
| 81 |
+
if sys.argv[1] == "download":
|
| 82 |
+
sync.download_data()
|
| 83 |
+
elif sys.argv[1] == "upload":
|
| 84 |
+
sync.upload_data()
|
| 85 |
+
else:
|
| 86 |
+
print("Usage: sync_storage.py [download|upload]")
|
| 87 |
+
else:
|
| 88 |
+
print("Usage: sync_storage.py [download|upload]")
|
| 89 |
+
|
| 90 |
+
if __name__ == "__main__":
|
| 91 |
+
main()
|