wwforonce commited on
Commit
5100142
·
1 Parent(s): d454b01

fix storage permission error

Browse files
Files changed (3) hide show
  1. Dockerfile +8 -3
  2. start_with_sync.sh +31 -10
  3. sync_storage.py +47 -22
Dockerfile CHANGED
@@ -14,6 +14,14 @@ RUN pip install --no-cache-dir \
14
  huggingface_hub \
15
  datasets
16
 
 
 
 
 
 
 
 
 
17
  # Copy sync scripts
18
  COPY sync_storage.py /app/sync_storage.py
19
  COPY start_with_sync.sh /start.sh
@@ -21,9 +29,6 @@ COPY start_with_sync.sh /start.sh
21
  # Make scripts executable
22
  RUN chmod +x /app/sync_storage.py /start.sh
23
 
24
- # Create data directory
25
- RUN mkdir -p /app/data
26
-
27
  # Set working directory
28
  WORKDIR /app
29
 
 
14
  huggingface_hub \
15
  datasets
16
 
17
+ # Create necessary directories with proper permissions
18
+ RUN mkdir -p /app/data /app/hf_cache /tmp/hf_cache && \
19
+ chmod -R 777 /app/data /app/hf_cache /tmp/hf_cache
20
+
21
+ # Set HuggingFace cache directory to writable location
22
+ ENV HF_HOME=/tmp/hf_cache
23
+ ENV HUGGINGFACE_HUB_CACHE=/tmp/hf_cache
24
+
25
  # Copy sync scripts
26
  COPY sync_storage.py /app/sync_storage.py
27
  COPY start_with_sync.sh /start.sh
 
29
  # Make scripts executable
30
  RUN chmod +x /app/sync_storage.py /start.sh
31
 
 
 
 
32
  # Set working directory
33
  WORKDIR /app
34
 
start_with_sync.sh CHANGED
@@ -7,12 +7,25 @@ export DATA_DIR="${DATA_DIR:-/app/data}"
7
  export HF_STORAGE_REPO="${HF_STORAGE_REPO:-your-username/open-webui-storage}"
8
  export SYNC_INTERVAL="${SYNC_INTERVAL:-300}" # 5 minutes
9
 
 
 
 
 
10
  echo "Starting Open WebUI with HF Dataset persistence..."
11
  echo "Data directory: $DATA_DIR"
12
  echo "HF Repository: $HF_STORAGE_REPO"
 
 
 
 
 
13
 
14
- # Create data directory
15
- mkdir -p "$DATA_DIR"
 
 
 
 
16
 
17
  # Download existing data on startup
18
  echo "Syncing data from Hugging Face..."
@@ -23,8 +36,10 @@ cleanup() {
23
  echo "Shutting down gracefully..."
24
 
25
  # Upload final data state
26
- echo "Uploading final data state..."
27
- python3 /app/sync_storage.py upload
 
 
28
 
29
  # Kill background processes
30
  kill $SYNC_PID 2>/dev/null || true
@@ -38,11 +53,18 @@ trap cleanup SIGTERM SIGINT
38
 
39
  # Background sync function
40
  background_sync() {
41
- while true; do
42
- sleep $SYNC_INTERVAL
43
- echo "Periodic sync to Hugging Face..."
44
- python3 /app/sync_storage.py upload
45
- done
 
 
 
 
 
 
 
46
  }
47
 
48
  # Start background sync
@@ -54,7 +76,6 @@ echo "Starting Open WebUI..."
54
 
55
  # Set environment variables for Open WebUI
56
  export WEBUI_SECRET_KEY="${WEBUI_SECRET_KEY:-$(openssl rand -hex 32)}"
57
- export DATA_DIR="$DATA_DIR"
58
 
59
  # Start Open WebUI in background
60
  /app/backend/start.sh &
 
7
  export HF_STORAGE_REPO="${HF_STORAGE_REPO:-your-username/open-webui-storage}"
8
  export SYNC_INTERVAL="${SYNC_INTERVAL:-300}" # 5 minutes
9
 
10
+ # Set HuggingFace cache to writable location
11
+ export HF_HOME="/tmp/hf_cache"
12
+ export HUGGINGFACE_HUB_CACHE="/tmp/hf_cache"
13
+
14
  echo "Starting Open WebUI with HF Dataset persistence..."
15
  echo "Data directory: $DATA_DIR"
16
  echo "HF Repository: $HF_STORAGE_REPO"
17
+ echo "HF Cache: $HF_HOME"
18
+
19
+ # Create directories with proper permissions
20
+ mkdir -p "$DATA_DIR" "$HF_HOME"
21
+ chmod -R 777 "$DATA_DIR" "$HF_HOME"
22
 
23
+ # Check if HF_TOKEN is set
24
+ if [ -z "$HF_TOKEN" ]; then
25
+ echo "Warning: HF_TOKEN not set. Sync functionality will be limited."
26
+ else
27
+ echo "HF_TOKEN is set, proceeding with sync..."
28
+ fi
29
 
30
  # Download existing data on startup
31
  echo "Syncing data from Hugging Face..."
 
36
  echo "Shutting down gracefully..."
37
 
38
  # Upload final data state
39
+ if [ -n "$HF_TOKEN" ]; then
40
+ echo "Uploading final data state..."
41
+ python3 /app/sync_storage.py upload
42
+ fi
43
 
44
  # Kill background processes
45
  kill $SYNC_PID 2>/dev/null || true
 
53
 
54
  # Background sync function
55
  background_sync() {
56
+ if [ -n "$HF_TOKEN" ]; then
57
+ while true; do
58
+ sleep $SYNC_INTERVAL
59
+ echo "Periodic sync to Hugging Face..."
60
+ python3 /app/sync_storage.py upload
61
+ done
62
+ else
63
+ echo "Skipping background sync - no HF_TOKEN"
64
+ while true; do
65
+ sleep 3600 # Just sleep if no token
66
+ done
67
+ fi
68
  }
69
 
70
  # Start background sync
 
76
 
77
  # Set environment variables for Open WebUI
78
  export WEBUI_SECRET_KEY="${WEBUI_SECRET_KEY:-$(openssl rand -hex 32)}"
 
79
 
80
  # Start Open WebUI in background
81
  /app/backend/start.sh &
sync_storage.py CHANGED
@@ -3,41 +3,58 @@ import os
3
  import shutil
4
  import json
5
  from pathlib import Path
6
- from huggingface_hub import HfApi, Repository, login
7
  import tarfile
8
  import tempfile
9
 
10
  class HFStorageSync:
11
  def __init__(self, repo_id, token=None, data_dir="/app/data"):
12
- self.repo_id = repo_id # e.g., "username/open-webui-storage"
13
  self.data_dir = Path(data_dir)
14
- self.api = HfApi()
15
 
16
- if token:
17
- login(token=token)
18
 
19
  def download_data(self):
20
  """Download and extract data from HF dataset repo"""
21
  try:
22
  print("Downloading data from Hugging Face...")
23
 
24
- # Download the data archive
25
- file_path = self.api.hf_hub_download(
26
- repo_id=self.repo_id,
27
- filename="data.tar.gz",
28
- repo_type="dataset"
29
- )
 
 
 
30
 
31
- # Extract to data directory
32
- self.data_dir.mkdir(parents=True, exist_ok=True)
33
-
34
- with tarfile.open(file_path, 'r:gz') as tar:
35
- tar.extractall(self.data_dir)
36
-
37
- print(f"Data extracted to {self.data_dir}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
  except Exception as e:
40
- print(f"No existing data found or error downloading: {e}")
41
  self.data_dir.mkdir(parents=True, exist_ok=True)
42
 
43
  def upload_data(self):
@@ -45,11 +62,15 @@ class HFStorageSync:
45
  try:
46
  print("Uploading data to Hugging Face...")
47
 
 
 
 
 
48
  # Create temporary archive
49
  with tempfile.NamedTemporaryFile(suffix='.tar.gz', delete=False) as tmp:
50
  with tarfile.open(tmp.name, 'w:gz') as tar:
51
- if self.data_dir.exists():
52
- tar.add(self.data_dir, arcname='.')
53
 
54
  # Upload to HF
55
  self.api.upload_file(
@@ -57,7 +78,8 @@ class HFStorageSync:
57
  path_in_repo="data.tar.gz",
58
  repo_id=self.repo_id,
59
  repo_type="dataset",
60
- commit_message="Update Open WebUI data"
 
61
  )
62
 
63
  # Clean up
@@ -75,6 +97,9 @@ def main():
75
  token = os.getenv("HF_TOKEN")
76
  data_dir = os.getenv("DATA_DIR", "/app/data")
77
 
 
 
 
78
  sync = HFStorageSync(repo_id, token, data_dir)
79
 
80
  if len(sys.argv) > 1:
 
3
  import shutil
4
  import json
5
  from pathlib import Path
6
+ from huggingface_hub import HfApi
7
  import tarfile
8
  import tempfile
9
 
10
  class HFStorageSync:
11
  def __init__(self, repo_id, token=None, data_dir="/app/data"):
12
+ self.repo_id = repo_id
13
  self.data_dir = Path(data_dir)
14
+ self.token = token
15
 
16
+ # Initialize API with token directly (avoid login() which saves to disk)
17
+ self.api = HfApi(token=token)
18
 
19
  def download_data(self):
20
  """Download and extract data from HF dataset repo"""
21
  try:
22
  print("Downloading data from Hugging Face...")
23
 
24
+ # Check if repo exists first
25
+ try:
26
+ repo_info = self.api.repo_info(repo_id=self.repo_id, repo_type="dataset")
27
+ print(f"Found repository: {self.repo_id}")
28
+ except Exception as e:
29
+ print(f"Repository {self.repo_id} not found or not accessible: {e}")
30
+ print("Creating empty data directory...")
31
+ self.data_dir.mkdir(parents=True, exist_ok=True)
32
+ return
33
 
34
+ # Try to download the data archive
35
+ try:
36
+ file_path = self.api.hf_hub_download(
37
+ repo_id=self.repo_id,
38
+ filename="data.tar.gz",
39
+ repo_type="dataset",
40
+ token=self.token
41
+ )
42
+
43
+ # Extract to data directory
44
+ self.data_dir.mkdir(parents=True, exist_ok=True)
45
+
46
+ with tarfile.open(file_path, 'r:gz') as tar:
47
+ tar.extractall(self.data_dir)
48
+
49
+ print(f"Data extracted to {self.data_dir}")
50
+
51
+ except Exception as e:
52
+ print(f"No data.tar.gz found in repository: {e}")
53
+ print("Starting with empty data directory...")
54
+ self.data_dir.mkdir(parents=True, exist_ok=True)
55
 
56
  except Exception as e:
57
+ print(f"Error during download: {e}")
58
  self.data_dir.mkdir(parents=True, exist_ok=True)
59
 
60
  def upload_data(self):
 
62
  try:
63
  print("Uploading data to Hugging Face...")
64
 
65
+ if not self.data_dir.exists() or not any(self.data_dir.iterdir()):
66
+ print("No data to upload")
67
+ return
68
+
69
  # Create temporary archive
70
  with tempfile.NamedTemporaryFile(suffix='.tar.gz', delete=False) as tmp:
71
  with tarfile.open(tmp.name, 'w:gz') as tar:
72
+ for item in self.data_dir.iterdir():
73
+ tar.add(item, arcname=item.name)
74
 
75
  # Upload to HF
76
  self.api.upload_file(
 
78
  path_in_repo="data.tar.gz",
79
  repo_id=self.repo_id,
80
  repo_type="dataset",
81
+ commit_message="Update Open WebUI data",
82
+ token=self.token
83
  )
84
 
85
  # Clean up
 
97
  token = os.getenv("HF_TOKEN")
98
  data_dir = os.getenv("DATA_DIR", "/app/data")
99
 
100
+ if not token:
101
+ print("Warning: HF_TOKEN not set, operations may fail")
102
+
103
  sync = HFStorageSync(repo_id, token, data_dir)
104
 
105
  if len(sys.argv) > 1: