Spaces:
Paused
Paused
fix storage permission error
Browse files- Dockerfile +8 -3
- start_with_sync.sh +31 -10
- sync_storage.py +47 -22
Dockerfile
CHANGED
|
@@ -14,6 +14,14 @@ RUN pip install --no-cache-dir \
|
|
| 14 |
huggingface_hub \
|
| 15 |
datasets
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
# Copy sync scripts
|
| 18 |
COPY sync_storage.py /app/sync_storage.py
|
| 19 |
COPY start_with_sync.sh /start.sh
|
|
@@ -21,9 +29,6 @@ COPY start_with_sync.sh /start.sh
|
|
| 21 |
# Make scripts executable
|
| 22 |
RUN chmod +x /app/sync_storage.py /start.sh
|
| 23 |
|
| 24 |
-
# Create data directory
|
| 25 |
-
RUN mkdir -p /app/data
|
| 26 |
-
|
| 27 |
# Set working directory
|
| 28 |
WORKDIR /app
|
| 29 |
|
|
|
|
| 14 |
huggingface_hub \
|
| 15 |
datasets
|
| 16 |
|
| 17 |
+
# Create necessary directories with proper permissions
|
| 18 |
+
RUN mkdir -p /app/data /app/hf_cache /tmp/hf_cache && \
|
| 19 |
+
chmod -R 777 /app/data /app/hf_cache /tmp/hf_cache
|
| 20 |
+
|
| 21 |
+
# Set HuggingFace cache directory to writable location
|
| 22 |
+
ENV HF_HOME=/tmp/hf_cache
|
| 23 |
+
ENV HUGGINGFACE_HUB_CACHE=/tmp/hf_cache
|
| 24 |
+
|
| 25 |
# Copy sync scripts
|
| 26 |
COPY sync_storage.py /app/sync_storage.py
|
| 27 |
COPY start_with_sync.sh /start.sh
|
|
|
|
| 29 |
# Make scripts executable
|
| 30 |
RUN chmod +x /app/sync_storage.py /start.sh
|
| 31 |
|
|
|
|
|
|
|
|
|
|
| 32 |
# Set working directory
|
| 33 |
WORKDIR /app
|
| 34 |
|
start_with_sync.sh
CHANGED
|
@@ -7,12 +7,25 @@ export DATA_DIR="${DATA_DIR:-/app/data}"
|
|
| 7 |
export HF_STORAGE_REPO="${HF_STORAGE_REPO:-your-username/open-webui-storage}"
|
| 8 |
export SYNC_INTERVAL="${SYNC_INTERVAL:-300}" # 5 minutes
|
| 9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
echo "Starting Open WebUI with HF Dataset persistence..."
|
| 11 |
echo "Data directory: $DATA_DIR"
|
| 12 |
echo "HF Repository: $HF_STORAGE_REPO"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
-
#
|
| 15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
# Download existing data on startup
|
| 18 |
echo "Syncing data from Hugging Face..."
|
|
@@ -23,8 +36,10 @@ cleanup() {
|
|
| 23 |
echo "Shutting down gracefully..."
|
| 24 |
|
| 25 |
# Upload final data state
|
| 26 |
-
|
| 27 |
-
|
|
|
|
|
|
|
| 28 |
|
| 29 |
# Kill background processes
|
| 30 |
kill $SYNC_PID 2>/dev/null || true
|
|
@@ -38,11 +53,18 @@ trap cleanup SIGTERM SIGINT
|
|
| 38 |
|
| 39 |
# Background sync function
|
| 40 |
background_sync() {
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
}
|
| 47 |
|
| 48 |
# Start background sync
|
|
@@ -54,7 +76,6 @@ echo "Starting Open WebUI..."
|
|
| 54 |
|
| 55 |
# Set environment variables for Open WebUI
|
| 56 |
export WEBUI_SECRET_KEY="${WEBUI_SECRET_KEY:-$(openssl rand -hex 32)}"
|
| 57 |
-
export DATA_DIR="$DATA_DIR"
|
| 58 |
|
| 59 |
# Start Open WebUI in background
|
| 60 |
/app/backend/start.sh &
|
|
|
|
| 7 |
export HF_STORAGE_REPO="${HF_STORAGE_REPO:-your-username/open-webui-storage}"
|
| 8 |
export SYNC_INTERVAL="${SYNC_INTERVAL:-300}" # 5 minutes
|
| 9 |
|
| 10 |
+
# Set HuggingFace cache to writable location
|
| 11 |
+
export HF_HOME="/tmp/hf_cache"
|
| 12 |
+
export HUGGINGFACE_HUB_CACHE="/tmp/hf_cache"
|
| 13 |
+
|
| 14 |
echo "Starting Open WebUI with HF Dataset persistence..."
|
| 15 |
echo "Data directory: $DATA_DIR"
|
| 16 |
echo "HF Repository: $HF_STORAGE_REPO"
|
| 17 |
+
echo "HF Cache: $HF_HOME"
|
| 18 |
+
|
| 19 |
+
# Create directories with proper permissions
|
| 20 |
+
mkdir -p "$DATA_DIR" "$HF_HOME"
|
| 21 |
+
chmod -R 777 "$DATA_DIR" "$HF_HOME"
|
| 22 |
|
| 23 |
+
# Check if HF_TOKEN is set
|
| 24 |
+
if [ -z "$HF_TOKEN" ]; then
|
| 25 |
+
echo "Warning: HF_TOKEN not set. Sync functionality will be limited."
|
| 26 |
+
else
|
| 27 |
+
echo "HF_TOKEN is set, proceeding with sync..."
|
| 28 |
+
fi
|
| 29 |
|
| 30 |
# Download existing data on startup
|
| 31 |
echo "Syncing data from Hugging Face..."
|
|
|
|
| 36 |
echo "Shutting down gracefully..."
|
| 37 |
|
| 38 |
# Upload final data state
|
| 39 |
+
if [ -n "$HF_TOKEN" ]; then
|
| 40 |
+
echo "Uploading final data state..."
|
| 41 |
+
python3 /app/sync_storage.py upload
|
| 42 |
+
fi
|
| 43 |
|
| 44 |
# Kill background processes
|
| 45 |
kill $SYNC_PID 2>/dev/null || true
|
|
|
|
| 53 |
|
| 54 |
# Background sync function
|
| 55 |
background_sync() {
|
| 56 |
+
if [ -n "$HF_TOKEN" ]; then
|
| 57 |
+
while true; do
|
| 58 |
+
sleep $SYNC_INTERVAL
|
| 59 |
+
echo "Periodic sync to Hugging Face..."
|
| 60 |
+
python3 /app/sync_storage.py upload
|
| 61 |
+
done
|
| 62 |
+
else
|
| 63 |
+
echo "Skipping background sync - no HF_TOKEN"
|
| 64 |
+
while true; do
|
| 65 |
+
sleep 3600 # Just sleep if no token
|
| 66 |
+
done
|
| 67 |
+
fi
|
| 68 |
}
|
| 69 |
|
| 70 |
# Start background sync
|
|
|
|
| 76 |
|
| 77 |
# Set environment variables for Open WebUI
|
| 78 |
export WEBUI_SECRET_KEY="${WEBUI_SECRET_KEY:-$(openssl rand -hex 32)}"
|
|
|
|
| 79 |
|
| 80 |
# Start Open WebUI in background
|
| 81 |
/app/backend/start.sh &
|
sync_storage.py
CHANGED
|
@@ -3,41 +3,58 @@ import os
|
|
| 3 |
import shutil
|
| 4 |
import json
|
| 5 |
from pathlib import Path
|
| 6 |
-
from huggingface_hub import HfApi
|
| 7 |
import tarfile
|
| 8 |
import tempfile
|
| 9 |
|
| 10 |
class HFStorageSync:
|
| 11 |
def __init__(self, repo_id, token=None, data_dir="/app/data"):
|
| 12 |
-
self.repo_id = repo_id
|
| 13 |
self.data_dir = Path(data_dir)
|
| 14 |
-
self.
|
| 15 |
|
| 16 |
-
|
| 17 |
-
|
| 18 |
|
| 19 |
def download_data(self):
|
| 20 |
"""Download and extract data from HF dataset repo"""
|
| 21 |
try:
|
| 22 |
print("Downloading data from Hugging Face...")
|
| 23 |
|
| 24 |
-
#
|
| 25 |
-
|
| 26 |
-
repo_id=self.repo_id,
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
-
#
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
except Exception as e:
|
| 40 |
-
print(f"
|
| 41 |
self.data_dir.mkdir(parents=True, exist_ok=True)
|
| 42 |
|
| 43 |
def upload_data(self):
|
|
@@ -45,11 +62,15 @@ class HFStorageSync:
|
|
| 45 |
try:
|
| 46 |
print("Uploading data to Hugging Face...")
|
| 47 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
# Create temporary archive
|
| 49 |
with tempfile.NamedTemporaryFile(suffix='.tar.gz', delete=False) as tmp:
|
| 50 |
with tarfile.open(tmp.name, 'w:gz') as tar:
|
| 51 |
-
|
| 52 |
-
tar.add(
|
| 53 |
|
| 54 |
# Upload to HF
|
| 55 |
self.api.upload_file(
|
|
@@ -57,7 +78,8 @@ class HFStorageSync:
|
|
| 57 |
path_in_repo="data.tar.gz",
|
| 58 |
repo_id=self.repo_id,
|
| 59 |
repo_type="dataset",
|
| 60 |
-
commit_message="Update Open WebUI data"
|
|
|
|
| 61 |
)
|
| 62 |
|
| 63 |
# Clean up
|
|
@@ -75,6 +97,9 @@ def main():
|
|
| 75 |
token = os.getenv("HF_TOKEN")
|
| 76 |
data_dir = os.getenv("DATA_DIR", "/app/data")
|
| 77 |
|
|
|
|
|
|
|
|
|
|
| 78 |
sync = HFStorageSync(repo_id, token, data_dir)
|
| 79 |
|
| 80 |
if len(sys.argv) > 1:
|
|
|
|
| 3 |
import shutil
|
| 4 |
import json
|
| 5 |
from pathlib import Path
|
| 6 |
+
from huggingface_hub import HfApi
|
| 7 |
import tarfile
|
| 8 |
import tempfile
|
| 9 |
|
| 10 |
class HFStorageSync:
|
| 11 |
def __init__(self, repo_id, token=None, data_dir="/app/data"):
|
| 12 |
+
self.repo_id = repo_id
|
| 13 |
self.data_dir = Path(data_dir)
|
| 14 |
+
self.token = token
|
| 15 |
|
| 16 |
+
# Initialize API with token directly (avoid login() which saves to disk)
|
| 17 |
+
self.api = HfApi(token=token)
|
| 18 |
|
| 19 |
def download_data(self):
|
| 20 |
"""Download and extract data from HF dataset repo"""
|
| 21 |
try:
|
| 22 |
print("Downloading data from Hugging Face...")
|
| 23 |
|
| 24 |
+
# Check if repo exists first
|
| 25 |
+
try:
|
| 26 |
+
repo_info = self.api.repo_info(repo_id=self.repo_id, repo_type="dataset")
|
| 27 |
+
print(f"Found repository: {self.repo_id}")
|
| 28 |
+
except Exception as e:
|
| 29 |
+
print(f"Repository {self.repo_id} not found or not accessible: {e}")
|
| 30 |
+
print("Creating empty data directory...")
|
| 31 |
+
self.data_dir.mkdir(parents=True, exist_ok=True)
|
| 32 |
+
return
|
| 33 |
|
| 34 |
+
# Try to download the data archive
|
| 35 |
+
try:
|
| 36 |
+
file_path = self.api.hf_hub_download(
|
| 37 |
+
repo_id=self.repo_id,
|
| 38 |
+
filename="data.tar.gz",
|
| 39 |
+
repo_type="dataset",
|
| 40 |
+
token=self.token
|
| 41 |
+
)
|
| 42 |
+
|
| 43 |
+
# Extract to data directory
|
| 44 |
+
self.data_dir.mkdir(parents=True, exist_ok=True)
|
| 45 |
+
|
| 46 |
+
with tarfile.open(file_path, 'r:gz') as tar:
|
| 47 |
+
tar.extractall(self.data_dir)
|
| 48 |
+
|
| 49 |
+
print(f"Data extracted to {self.data_dir}")
|
| 50 |
+
|
| 51 |
+
except Exception as e:
|
| 52 |
+
print(f"No data.tar.gz found in repository: {e}")
|
| 53 |
+
print("Starting with empty data directory...")
|
| 54 |
+
self.data_dir.mkdir(parents=True, exist_ok=True)
|
| 55 |
|
| 56 |
except Exception as e:
|
| 57 |
+
print(f"Error during download: {e}")
|
| 58 |
self.data_dir.mkdir(parents=True, exist_ok=True)
|
| 59 |
|
| 60 |
def upload_data(self):
|
|
|
|
| 62 |
try:
|
| 63 |
print("Uploading data to Hugging Face...")
|
| 64 |
|
| 65 |
+
if not self.data_dir.exists() or not any(self.data_dir.iterdir()):
|
| 66 |
+
print("No data to upload")
|
| 67 |
+
return
|
| 68 |
+
|
| 69 |
# Create temporary archive
|
| 70 |
with tempfile.NamedTemporaryFile(suffix='.tar.gz', delete=False) as tmp:
|
| 71 |
with tarfile.open(tmp.name, 'w:gz') as tar:
|
| 72 |
+
for item in self.data_dir.iterdir():
|
| 73 |
+
tar.add(item, arcname=item.name)
|
| 74 |
|
| 75 |
# Upload to HF
|
| 76 |
self.api.upload_file(
|
|
|
|
| 78 |
path_in_repo="data.tar.gz",
|
| 79 |
repo_id=self.repo_id,
|
| 80 |
repo_type="dataset",
|
| 81 |
+
commit_message="Update Open WebUI data",
|
| 82 |
+
token=self.token
|
| 83 |
)
|
| 84 |
|
| 85 |
# Clean up
|
|
|
|
| 97 |
token = os.getenv("HF_TOKEN")
|
| 98 |
data_dir = os.getenv("DATA_DIR", "/app/data")
|
| 99 |
|
| 100 |
+
if not token:
|
| 101 |
+
print("Warning: HF_TOKEN not set, operations may fail")
|
| 102 |
+
|
| 103 |
sync = HFStorageSync(repo_id, token, data_dir)
|
| 104 |
|
| 105 |
if len(sys.argv) > 1:
|