thryyyyy commited on
Commit
aa04220
·
1 Parent(s): 83beded

more fixes

Browse files
Files changed (3) hide show
  1. backend/scripts/backup.py +113 -179
  2. backend/scripts/restore.py +63 -121
  3. backend/start.sh +15 -12
backend/scripts/backup.py CHANGED
@@ -5,149 +5,70 @@ import subprocess
5
  import datetime
6
  import sqlite3
7
  from pathlib import Path
 
8
  from huggingface_hub import HfApi, hf_hub_download, CommitOperationAdd
9
 
10
- # Get the absolute paths for our script and base directories
11
- SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
12
- BASE_DIR = os.path.dirname(SCRIPT_DIR)
 
 
 
 
 
 
 
13
 
14
- # Define all paths as absolute paths relative to our base directory
15
- TIMESTAMP_FILE_PATH = os.path.join(BASE_DIR, "db_backup/last_backup_time.txt")
16
- DB_GPG_PATH = os.path.join(BASE_DIR, "db_backup/webui.db.gpg")
17
- DB_FILE_PATH = os.path.join(BASE_DIR, "data/webui.db")
18
 
19
  def ensure_directories():
20
  """
21
- Create and verify all necessary directories for our backup operation.
22
- Includes thorough permission checking and write verification.
23
 
24
  Returns:
25
- bool: True if directories were created and verified successfully
26
  """
27
  try:
28
- for directory in [os.path.dirname(DB_GPG_PATH), os.path.dirname(DB_FILE_PATH)]:
29
- # Create directory with full permissions for the current user
30
  os.makedirs(directory, mode=0o755, exist_ok=True)
31
-
32
- # Verify directory exists and get its current permissions
33
  dir_stat = os.stat(directory)
34
- print(f"Directory {directory} created with permissions: {oct(dir_stat.st_mode)[-3:]}")
35
 
36
- # Verify we can write to the directory with a test file
37
  test_file = os.path.join(directory, '.write_test')
38
- try:
39
- with open(test_file, 'w') as f:
40
- f.write('test')
41
- os.remove(test_file)
42
- print(f"Successfully verified write access to {directory}")
43
- except Exception as e:
44
- print(f"Directory {directory} is not writable: {e}")
45
- print(f"Current process user ID: {os.getuid()}")
46
- print(f"Directory owner ID: {dir_stat.st_uid}")
47
- return False
48
-
49
- return True
50
- except Exception as e:
51
- print(f"Error in directory creation/verification: {e}")
52
- print(f"Current process user ID: {os.getuid()}")
53
- print(f"Current process group ID: {os.getgid()}")
54
- print(f"Current working directory: {os.getcwd()}")
55
- return False
56
-
57
- def get_last_backup_time(repo_id, hf_token):
58
- """
59
- Retrieve the timestamp of the last backup from HuggingFace Space.
60
-
61
- Args:
62
- repo_id (str): The HuggingFace Space ID
63
- hf_token (str): The HuggingFace API token
64
-
65
- Returns:
66
- datetime or None: The timestamp of the last backup, or None if not found
67
- """
68
- api = HfApi()
69
- try:
70
- # Check if the timestamp file exists in the repository
71
- files = api.list_repo_files(
72
- repo_id=repo_id,
73
- repo_type="space",
74
- token=hf_token
75
- )
76
-
77
- relative_timestamp_path = os.path.relpath(TIMESTAMP_FILE_PATH, BASE_DIR)
78
- if relative_timestamp_path not in files:
79
- print(f"No timestamp file found in repository")
80
- return None
81
-
82
- # Download and parse the timestamp file
83
- temp_file = hf_hub_download(
84
- repo_id=repo_id,
85
- repo_type="space",
86
- filename=relative_timestamp_path,
87
- token=hf_token
88
- )
89
-
90
- with open(temp_file, "r", encoding="utf-8") as f:
91
- timestamp_str = f.read().strip()
92
- return datetime.datetime.fromisoformat(timestamp_str)
93
-
94
- except Exception as e:
95
- print(f"Error getting last backup time: {e}")
96
- return None
97
-
98
- def save_timestamp_locally():
99
- """
100
- Save the current UTC time as our backup timestamp.
101
- Includes error handling and directory creation.
102
-
103
- Returns:
104
- bool: True if timestamp was saved successfully
105
- """
106
- try:
107
- # Use UTC time with timezone information
108
- now = datetime.datetime.now(datetime.timezone.utc)
109
-
110
- # Ensure the directory exists
111
- os.makedirs(os.path.dirname(TIMESTAMP_FILE_PATH), mode=0o755, exist_ok=True)
112
-
113
- # Write the timestamp atomically using a temporary file
114
- temp_path = f"{TIMESTAMP_FILE_PATH}.tmp"
115
- with open(temp_path, "w", encoding="utf-8") as f:
116
- f.write(now.isoformat())
117
- os.replace(temp_path, TIMESTAMP_FILE_PATH)
118
-
119
  return True
120
  except Exception as e:
121
- print(f"Error saving timestamp: {e}")
122
  return False
123
 
124
  def verify_database():
125
  """
126
- Comprehensive verification of the database's existence and integrity.
127
- Uses SQLite's built-in integrity check and schema validation.
128
-
129
- Returns:
130
- bool: True if database is valid and healthy
131
  """
132
  if not os.path.exists(DB_FILE_PATH):
133
  print(f"Database file not found at: {DB_FILE_PATH}")
134
  return False
135
 
136
  try:
137
- # Get file information for debugging
138
  file_stat = os.stat(DB_FILE_PATH)
139
  print(f"Database file size: {file_stat.st_size:,} bytes")
140
  print(f"Database file permissions: {oct(file_stat.st_mode)[-3:]}")
141
 
142
- # Verify database integrity
143
  with sqlite3.connect(DB_FILE_PATH) as conn:
144
  cursor = conn.cursor()
145
-
146
- # Run comprehensive integrity check
147
  cursor.execute("PRAGMA integrity_check;")
148
  result = cursor.fetchone()[0]
149
-
150
- # Verify schema structure
151
  cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
152
  tables = cursor.fetchall()
153
 
@@ -162,55 +83,29 @@ def verify_database():
162
  if len(tables) == 0:
163
  print("No tables found in database")
164
  return False
165
-
166
  except sqlite3.Error as e:
167
  print(f"SQLite error during verification: {e}")
168
  return False
169
  except Exception as e:
170
  print(f"Unexpected error during verification: {e}")
171
- print(f"Error type: {type(e).__name__}")
172
  return False
173
 
174
  def encrypt_database(passphrase):
175
  """
176
- Encrypt the database file using GPG with robust directory handling.
 
 
 
 
177
  """
178
  try:
179
  print("\nPreparing for database encryption...")
180
 
181
- # Get absolute paths
182
- target_dir = os.path.dirname(DB_GPG_PATH)
183
-
184
- # Create directory with explicit permissions right before encryption
185
- print(f"Ensuring target directory exists: {target_dir}")
186
- os.makedirs(target_dir, mode=0o755, exist_ok=True)
187
-
188
- # Verify directory was created
189
- if not os.path.isdir(target_dir):
190
- print(f"Failed to create directory: {target_dir}")
191
- return False
192
-
193
- # Get and display directory permissions
194
- dir_stat = os.stat(target_dir)
195
- print(f"Directory permissions: {oct(dir_stat.st_mode)[-3:]}")
196
- print(f"Directory owner: {dir_stat.st_uid}")
197
-
198
- # Try to create a test file to verify write access
199
- test_file = os.path.join(target_dir, '.gpg_test')
200
- try:
201
- with open(test_file, 'w') as f:
202
- f.write('test')
203
- os.remove(test_file)
204
- print("Write test successful")
205
- except Exception as e:
206
- print(f"Write test failed: {e}")
207
- return False
208
-
209
- # Create .gnupg directory with explicit permissions
210
  gnupg_dir = '/root/.gnupg'
211
  os.makedirs(gnupg_dir, mode=0o700, exist_ok=True)
212
 
213
- # Prepare GPG command
214
  encrypt_cmd = [
215
  "gpg",
216
  "--batch",
@@ -222,106 +117,146 @@ def encrypt_database(passphrase):
222
  DB_FILE_PATH
223
  ]
224
 
225
- print("\nRunning GPG encryption...")
226
  result = subprocess.run(
227
  encrypt_cmd,
228
  capture_output=True,
229
- text=True,
230
- check=False
231
  )
232
 
233
  if result.returncode != 0:
234
  print(f"GPG encryption failed with code {result.returncode}")
235
- print(f"Error output: {result.stderr}")
236
  return False
237
 
238
- # Verify the encrypted file was created
239
  if os.path.exists(DB_GPG_PATH):
240
  encrypted_size = os.path.getsize(DB_GPG_PATH)
241
  print(f"Encryption successful. Encrypted file size: {encrypted_size:,} bytes")
242
  return True
243
  else:
244
- print("GPG reported success but file not found")
245
  return False
246
-
247
  except Exception as e:
248
  print(f"Encryption failed with exception: {e}")
249
  return False
250
 
251
- def backup_db():
252
  """
253
- Main function to handle the complete database backup process.
254
- Includes environment validation, threshold checking, encryption, and upload.
255
 
256
  Returns:
257
- bool: True if backup completed successfully
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
258
  """
259
- # Validate environment variables
260
  passphrase = os.environ.get("BACKUP_PASSPHRASE")
261
  hf_token = os.environ.get("HF_TOKEN")
262
  space_id = os.environ.get("SPACE_ID")
263
 
264
  if not all([passphrase, hf_token, space_id]):
265
- print("Error: Missing required environment variables")
266
  return False
267
-
268
- # Ensure directory structure exists
269
  if not ensure_directories():
270
  print("Failed to create or verify directories")
271
  return False
272
 
273
- # Get backup threshold setting
274
  threshold_minutes = int(os.environ.get("BACKUP_THRESHOLD_MINUTES", 120))
275
-
276
- # Check if backup is needed based on threshold
277
  if threshold_minutes > 0:
278
  last_backup_dt = get_last_backup_time(space_id, hf_token)
279
  if last_backup_dt is not None:
280
  now = datetime.datetime.now(datetime.timezone.utc)
 
281
  if not last_backup_dt.tzinfo:
282
  last_backup_dt = last_backup_dt.replace(tzinfo=datetime.timezone.utc)
283
  elapsed = now - last_backup_dt
284
  if elapsed.total_seconds() < threshold_minutes * 60:
285
  print(f"Last backup was only {elapsed.total_seconds()/3600:.2f} hours ago")
286
  print(f"Threshold is {threshold_minutes} minutes")
287
- print("Skipping backup to avoid rebuild loop")
288
  return True
289
  else:
290
  print("Backup threshold check disabled for testing")
291
-
292
- # Verify database integrity
293
  if not verify_database():
294
  print("Database verification failed, aborting backup")
295
  return False
296
-
297
  # Encrypt the database
298
  if not encrypt_database(passphrase):
299
  print("Database encryption failed, aborting backup")
300
  return False
301
-
302
- # Save the backup timestamp
303
  if not save_timestamp_locally():
304
- print("Failed to save timestamp")
305
- return False
306
-
307
- # Upload to HuggingFace Spaces
308
- print("\nUploading to Hugging Face Spaces...")
309
- api = HfApi()
310
 
 
 
311
  try:
312
- # Prepare file operations
313
  operations = [
 
314
  CommitOperationAdd(
315
- path_in_repo=os.path.relpath(DB_GPG_PATH, BASE_DIR),
316
  path_or_fileobj=DB_GPG_PATH
317
  ),
 
318
  CommitOperationAdd(
319
- path_in_repo=os.path.relpath(TIMESTAMP_FILE_PATH, BASE_DIR),
320
  path_or_fileobj=TIMESTAMP_FILE_PATH
321
  )
322
  ]
323
-
324
- # Create commit with both files
325
  api.create_commit(
326
  repo_id=space_id,
327
  repo_type="space",
@@ -331,9 +266,8 @@ def backup_db():
331
  )
332
  print("Backup files uploaded successfully!")
333
  return True
334
-
335
  except Exception as e:
336
- print(f"Error uploading to HuggingFace: {e}")
337
  return False
338
 
339
  if __name__ == "__main__":
 
5
  import datetime
6
  import sqlite3
7
  from pathlib import Path
8
+
9
  from huggingface_hub import HfApi, hf_hub_download, CommitOperationAdd
10
 
11
+ # ------------------------------------------------------------------------------
12
+ # 1. Define directories outside of /app to avoid read-only file system issues
13
+ # ------------------------------------------------------------------------------
14
+ BACKUP_DIR = "/tmp/open_webui/db_backup"
15
+ DATA_DIR = "/tmp/open_webui/data"
16
+
17
+ # Actual files on the local (runtime) filesystem
18
+ TIMESTAMP_FILE_PATH = os.path.join(BACKUP_DIR, "last_backup_time.txt")
19
+ DB_GPG_PATH = os.path.join(BACKUP_DIR, "webui.db.gpg")
20
+ DB_FILE_PATH = os.path.join(DATA_DIR, "webui.db")
21
 
22
+ # Paths to store in the Hugging Face Space repository
23
+ # (so your commit will look like "db_backup/webui.db.gpg" etc.)
24
+ REPO_TIMESTAMP_FILE = "db_backup/last_backup_time.txt"
25
+ REPO_DB_GPG_FILE = "db_backup/webui.db.gpg"
26
 
27
  def ensure_directories():
28
  """
29
+ Create and verify all necessary directories for our backup operation
30
+ in /tmp/open_webui/ rather than /app.
31
 
32
  Returns:
33
+ bool: True if directories exist and are writable
34
  """
35
  try:
36
+ for directory in [BACKUP_DIR, DATA_DIR]:
 
37
  os.makedirs(directory, mode=0o755, exist_ok=True)
 
 
38
  dir_stat = os.stat(directory)
39
+ print(f"Directory {directory} created or exists with permissions: {oct(dir_stat.st_mode)[-3:]}")
40
 
41
+ # Verify we can write to the directory
42
  test_file = os.path.join(directory, '.write_test')
43
+ with open(test_file, 'w') as f:
44
+ f.write('test')
45
+ os.remove(test_file)
46
+ print(f"Successfully verified write access to {directory}")
47
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  return True
49
  except Exception as e:
50
+ print(f"Error creating/verifying directories: {e}")
51
  return False
52
 
53
  def verify_database():
54
  """
55
+ Verify the database exists and passes a basic SQLite integrity check.
 
 
 
 
56
  """
57
  if not os.path.exists(DB_FILE_PATH):
58
  print(f"Database file not found at: {DB_FILE_PATH}")
59
  return False
60
 
61
  try:
62
+ # Print file info for debugging
63
  file_stat = os.stat(DB_FILE_PATH)
64
  print(f"Database file size: {file_stat.st_size:,} bytes")
65
  print(f"Database file permissions: {oct(file_stat.st_mode)[-3:]}")
66
 
67
+ # Run SQLite integrity check
68
  with sqlite3.connect(DB_FILE_PATH) as conn:
69
  cursor = conn.cursor()
 
 
70
  cursor.execute("PRAGMA integrity_check;")
71
  result = cursor.fetchone()[0]
 
 
72
  cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
73
  tables = cursor.fetchall()
74
 
 
83
  if len(tables) == 0:
84
  print("No tables found in database")
85
  return False
 
86
  except sqlite3.Error as e:
87
  print(f"SQLite error during verification: {e}")
88
  return False
89
  except Exception as e:
90
  print(f"Unexpected error during verification: {e}")
 
91
  return False
92
 
93
  def encrypt_database(passphrase):
94
  """
95
+ Encrypt the database file using GPG (AES256), writing the .gpg file
96
+ to /tmp/open_webui/db_backup.
97
+
98
+ Returns:
99
+ bool: True if successful, False otherwise
100
  """
101
  try:
102
  print("\nPreparing for database encryption...")
103
 
104
+ # Ensure GPG home directory is created with correct perms
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  gnupg_dir = '/root/.gnupg'
106
  os.makedirs(gnupg_dir, mode=0o700, exist_ok=True)
107
 
108
+ print("\nRunning GPG encryption...")
109
  encrypt_cmd = [
110
  "gpg",
111
  "--batch",
 
117
  DB_FILE_PATH
118
  ]
119
 
 
120
  result = subprocess.run(
121
  encrypt_cmd,
122
  capture_output=True,
123
+ text=True
 
124
  )
125
 
126
  if result.returncode != 0:
127
  print(f"GPG encryption failed with code {result.returncode}")
128
+ print(f"GPG stderr: {result.stderr}")
129
  return False
130
 
 
131
  if os.path.exists(DB_GPG_PATH):
132
  encrypted_size = os.path.getsize(DB_GPG_PATH)
133
  print(f"Encryption successful. Encrypted file size: {encrypted_size:,} bytes")
134
  return True
135
  else:
136
+ print("GPG reported success but the .gpg file was not found.")
137
  return False
 
138
  except Exception as e:
139
  print(f"Encryption failed with exception: {e}")
140
  return False
141
 
142
+ def get_last_backup_time(repo_id, hf_token):
143
  """
144
+ Fetch last backup timestamp from the Hugging Face Space.
 
145
 
146
  Returns:
147
+ datetime or None
148
+ """
149
+ try:
150
+ api = HfApi()
151
+ files = api.list_repo_files(repo_id=repo_id, repo_type="space", token=hf_token)
152
+
153
+ if REPO_TIMESTAMP_FILE not in files:
154
+ print("No timestamp file found in repository")
155
+ return None
156
+
157
+ # Download the timestamp file
158
+ temp_file = hf_hub_download(
159
+ repo_id=repo_id,
160
+ repo_type="space",
161
+ filename=REPO_TIMESTAMP_FILE,
162
+ token=hf_token
163
+ )
164
+ with open(temp_file, "r", encoding="utf-8") as f:
165
+ timestamp_str = f.read().strip()
166
+
167
+ return datetime.datetime.fromisoformat(timestamp_str)
168
+ except Exception as e:
169
+ print(f"Error getting last backup time: {e}")
170
+ return None
171
+
172
+ def save_timestamp_locally():
173
+ """
174
+ Save the current UTC time as our local timestamp.
175
+ """
176
+ try:
177
+ now = datetime.datetime.now(datetime.timezone.utc)
178
+ os.makedirs(os.path.dirname(TIMESTAMP_FILE_PATH), exist_ok=True)
179
+
180
+ temp_path = f"{TIMESTAMP_FILE_PATH}.tmp"
181
+ with open(temp_path, "w", encoding="utf-8") as f:
182
+ f.write(now.isoformat())
183
+ os.replace(temp_path, TIMESTAMP_FILE_PATH)
184
+
185
+ return True
186
+ except Exception as e:
187
+ print(f"Error saving timestamp: {e}")
188
+ return False
189
+
190
+ def backup_db():
191
+ """
192
+ Main entry point for performing a backup:
193
+ 1. Ensure directories are writable
194
+ 2. Check if backup is needed (threshold)
195
+ 3. Verify DB
196
+ 4. Encrypt DB
197
+ 5. Save new timestamp
198
+ 6. Upload to HF Space
199
  """
 
200
  passphrase = os.environ.get("BACKUP_PASSPHRASE")
201
  hf_token = os.environ.get("HF_TOKEN")
202
  space_id = os.environ.get("SPACE_ID")
203
 
204
  if not all([passphrase, hf_token, space_id]):
205
+ print("Error: Missing one of BACKUP_PASSPHRASE, HF_TOKEN, SPACE_ID")
206
  return False
207
+
 
208
  if not ensure_directories():
209
  print("Failed to create or verify directories")
210
  return False
211
 
 
212
  threshold_minutes = int(os.environ.get("BACKUP_THRESHOLD_MINUTES", 120))
 
 
213
  if threshold_minutes > 0:
214
  last_backup_dt = get_last_backup_time(space_id, hf_token)
215
  if last_backup_dt is not None:
216
  now = datetime.datetime.now(datetime.timezone.utc)
217
+ # ensure last_backup_dt has a tz
218
  if not last_backup_dt.tzinfo:
219
  last_backup_dt = last_backup_dt.replace(tzinfo=datetime.timezone.utc)
220
  elapsed = now - last_backup_dt
221
  if elapsed.total_seconds() < threshold_minutes * 60:
222
  print(f"Last backup was only {elapsed.total_seconds()/3600:.2f} hours ago")
223
  print(f"Threshold is {threshold_minutes} minutes")
224
+ print("Skipping backup to avoid rebuilding too often")
225
  return True
226
  else:
227
  print("Backup threshold check disabled for testing")
228
+
229
+ # Verify the local database
230
  if not verify_database():
231
  print("Database verification failed, aborting backup")
232
  return False
233
+
234
  # Encrypt the database
235
  if not encrypt_database(passphrase):
236
  print("Database encryption failed, aborting backup")
237
  return False
238
+
239
+ # Save timestamp to local file
240
  if not save_timestamp_locally():
241
+ print("Failed to save timestamp locally, but continuing to upload anyway...")
 
 
 
 
 
242
 
243
+ # Upload to Hugging Face
244
+ print("\nUploading to Hugging Face Spaces...")
245
  try:
246
+ api = HfApi()
247
  operations = [
248
+ # Add the .gpg file
249
  CommitOperationAdd(
250
+ path_in_repo=REPO_DB_GPG_FILE,
251
  path_or_fileobj=DB_GPG_PATH
252
  ),
253
+ # Add the timestamp file
254
  CommitOperationAdd(
255
+ path_in_repo=REPO_TIMESTAMP_FILE,
256
  path_or_fileobj=TIMESTAMP_FILE_PATH
257
  )
258
  ]
259
+
 
260
  api.create_commit(
261
  repo_id=space_id,
262
  repo_type="space",
 
266
  )
267
  print("Backup files uploaded successfully!")
268
  return True
 
269
  except Exception as e:
270
+ print(f"Error uploading to Hugging Face: {e}")
271
  return False
272
 
273
  if __name__ == "__main__":
backend/scripts/restore.py CHANGED
@@ -5,29 +5,29 @@ import subprocess
5
  import datetime
6
  import sqlite3
7
  from pathlib import Path
 
8
  from huggingface_hub import HfApi, hf_hub_download
9
 
10
- # Just like in backup.py, we establish absolute paths based on the script's location
11
- # This ensures consistent path handling regardless of where the script is called from
12
- SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
13
- BASE_DIR = os.path.dirname(SCRIPT_DIR)
 
 
 
 
 
 
14
 
15
- # Define all paths as absolute paths relative to our base directory
16
- # This maintains consistency with backup.py and prevents path-related issues
17
- TIMESTAMP_FILE_PATH = os.path.join(BASE_DIR, "db_backup/last_backup_time.txt")
18
- DB_GPG_PATH = os.path.join(BASE_DIR, "db_backup/webui.db.gpg")
19
- DB_FILE_PATH = os.path.join(BASE_DIR, "data/webui.db")
20
 
21
  def check_requirements():
22
  """
23
  Verify that GPG is installed and available in the system.
24
- GPG is essential for decrypting the database backup.
25
-
26
- Returns:
27
- bool: True if GPG is available, False otherwise
28
  """
29
  try:
30
- # Test GPG availability by checking its version
31
  subprocess.run(["gpg", "--version"], check=True, capture_output=True)
32
  return True
33
  except (subprocess.CalledProcessError, FileNotFoundError):
@@ -36,11 +36,7 @@ def check_requirements():
36
 
37
  def validate_secrets():
38
  """
39
- Ensure all required environment variables are set.
40
- These variables are essential for accessing and decrypting the backup.
41
-
42
- Returns:
43
- bool: True if all required variables are set, False otherwise
44
  """
45
  required_vars = ["BACKUP_PASSPHRASE", "HF_TOKEN", "SPACE_ID"]
46
  missing = [var for var in required_vars if not os.environ.get(var)]
@@ -52,15 +48,10 @@ def validate_secrets():
52
 
53
  def ensure_directories():
54
  """
55
- Create necessary directories for database and backup files.
56
- This ensures we have the proper directory structure before any operations.
57
-
58
- Returns:
59
- bool: True if directories are created successfully, False otherwise
60
  """
61
  try:
62
- # Create both backup and database directories with appropriate permissions
63
- for directory in [os.path.dirname(DB_FILE_PATH), os.path.dirname(DB_GPG_PATH)]:
64
  os.makedirs(directory, mode=0o755, exist_ok=True)
65
  return True
66
  except Exception as e:
@@ -69,108 +60,78 @@ def ensure_directories():
69
 
70
  def get_latest_backup_info(repo_id, hf_token):
71
  """
72
- Check for and retrieve information about the latest backup from HuggingFace.
73
-
74
- Args:
75
- repo_id (str): The HuggingFace Space ID
76
- hf_token (str): The HuggingFace API token
77
-
78
- Returns:
79
- tuple: (backup_exists: bool, timestamp: datetime or None)
80
  """
81
  api = HfApi()
82
  try:
83
- # Check if backup file exists in the repository
84
- files = api.list_repo_files(
85
- repo_id=repo_id,
86
- repo_type="space",
87
- token=hf_token
88
- )
89
-
90
- # Look for backup file using relative path
91
- relative_backup_path = os.path.relpath(DB_GPG_PATH, BASE_DIR)
92
- backup_exists = relative_backup_path in files
93
 
 
 
94
  if not backup_exists:
95
  print("No backup file found in the repository")
96
  return False, None
97
-
98
- # If backup exists, try to get its timestamp
99
  try:
100
- relative_timestamp_path = os.path.relpath(TIMESTAMP_FILE_PATH, BASE_DIR)
101
- timestamp_file = hf_hub_download(
102
- repo_id=repo_id,
103
- repo_type="space",
104
- filename=relative_timestamp_path,
105
- token=hf_token
106
- )
107
-
108
- with open(timestamp_file, "r", encoding="utf-8") as f:
109
- timestamp = datetime.datetime.fromisoformat(f.read().strip())
110
  print(f"Found backup from: {timestamp} UTC")
111
  return True, timestamp
112
-
 
 
113
  except Exception as e:
114
- print(f"Note: Could not read timestamp (this is okay for first run): {e}")
115
  return True, None
116
-
117
  except Exception as e:
118
  print(f"Error checking repository: {e}")
119
- print("For debugging - trying to list repository contents:")
120
- try:
121
- files = api.list_repo_files(repo_id=repo_id, repo_type="space", token=hf_token)
122
- print(f"Files in repository: {files}")
123
- except Exception as debug_e:
124
- print(f"Debug listing failed: {debug_e}")
125
  return False, None
126
 
127
  def download_backup(repo_id, hf_token):
128
  """
129
- Download the encrypted database backup from HuggingFace.
130
-
131
- Args:
132
- repo_id (str): The HuggingFace Space ID
133
- hf_token (str): The HuggingFace API token
134
-
135
- Returns:
136
- bool: True if download is successful, False otherwise
137
  """
138
  try:
139
  print("Downloading encrypted database backup...")
140
- relative_backup_path = os.path.relpath(DB_GPG_PATH, BASE_DIR)
141
  temp_file = hf_hub_download(
142
  repo_id=repo_id,
143
  repo_type="space",
144
- filename=relative_backup_path,
145
  token=hf_token
146
  )
147
 
148
- # Move the downloaded file to the correct location
149
  os.makedirs(os.path.dirname(DB_GPG_PATH), exist_ok=True)
150
  os.replace(temp_file, DB_GPG_PATH)
151
  print("Backup downloaded successfully")
152
  return True
153
-
154
  except Exception as e:
155
  print(f"Error downloading backup: {e}")
156
  return False
157
 
158
  def decrypt_database(passphrase):
159
  """
160
- Decrypt the downloaded database file using GPG.
161
-
162
- Args:
163
- passphrase (str): The passphrase for decrypting the database
164
-
165
- Returns:
166
- bool: True if decryption is successful or no backup exists, False on error
167
  """
168
  if not os.path.exists(DB_GPG_PATH):
169
- print("No encrypted backup found locally")
170
- return True # Not an error, might be first run
171
-
172
  try:
173
  print("Decrypting database with GPG...")
 
 
 
 
 
174
  decrypt_cmd = [
175
  "gpg",
176
  "--batch",
@@ -180,37 +141,29 @@ def decrypt_database(passphrase):
180
  "-o", DB_FILE_PATH,
181
  DB_GPG_PATH
182
  ]
183
-
184
  subprocess.run(decrypt_cmd, check=True, stderr=subprocess.PIPE)
185
  print(f"Database decrypted successfully to {DB_FILE_PATH}")
186
  return True
187
-
188
  except subprocess.CalledProcessError as e:
189
  print(f"Failed to decrypt database: {e}")
190
- print(f"GPG error output: {e.stderr.decode()}")
 
191
  return False
192
 
193
  def verify_database():
194
  """
195
- Verify the integrity of the restored database using SQLite's built-in checks.
196
- This function uses Python's sqlite3 module instead of the command-line tool.
197
-
198
- Returns:
199
- bool: True if database is valid or doesn't exist, False if corrupted
200
  """
201
  if not os.path.exists(DB_FILE_PATH):
202
- return True # Not an error, might be first run
203
-
 
204
  try:
205
  print("Verifying database integrity...")
206
  with sqlite3.connect(DB_FILE_PATH) as conn:
207
  cursor = conn.cursor()
208
-
209
- # Run integrity check
210
  cursor.execute("PRAGMA integrity_check;")
211
  result = cursor.fetchone()[0]
212
-
213
- # Verify basic schema structure
214
  cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
215
  tables = cursor.fetchall()
216
 
@@ -225,7 +178,6 @@ def verify_database():
225
  if len(tables) == 0:
226
  print("No tables found in database")
227
  return False
228
-
229
  except sqlite3.Error as e:
230
  print(f"Database verification failed: {e}")
231
  return False
@@ -235,33 +187,23 @@ def verify_database():
235
 
236
  def restore_db():
237
  """
238
- Main function to handle the database restoration process.
239
- This function orchestrates the complete restore operation, including:
240
- 1. Environment validation
241
- 2. Directory creation
242
- 3. Backup download
243
- 4. Decryption
244
- 5. Database verification
245
-
246
- Returns:
247
- bool: True if restore is successful or no backup needed, False on error
248
  """
249
- # Check requirements and environment
250
  if not check_requirements() or not validate_secrets():
251
  return False
252
 
253
- # Ensure required directories exist
254
  if not ensure_directories():
255
  return False
256
 
257
- # Get environment variables
258
  passphrase = os.environ["BACKUP_PASSPHRASE"]
259
  hf_token = os.environ["HF_TOKEN"]
260
  space_id = os.environ["SPACE_ID"]
261
 
262
- # Check if there's a backup in the repository
263
  backup_exists, timestamp = get_latest_backup_info(space_id, hf_token)
264
-
265
  if backup_exists:
266
  if not download_backup(space_id, hf_token):
267
  print("Failed to download backup")
@@ -270,15 +212,15 @@ def restore_db():
270
  if not decrypt_database(passphrase):
271
  print("Failed to decrypt database")
272
  return False
273
-
274
  if not verify_database():
275
- print("Failed to verify database integrity")
276
- # Remove potentially corrupted database
277
  if os.path.exists(DB_FILE_PATH):
278
  os.unlink(DB_FILE_PATH)
279
  return False
280
  else:
281
- print("No backup found - starting with fresh database")
282
 
283
  print("Database restore completed successfully!")
284
  return True
 
5
  import datetime
6
  import sqlite3
7
  from pathlib import Path
8
+
9
  from huggingface_hub import HfApi, hf_hub_download
10
 
11
+ # ------------------------------------------------------------------------------
12
+ # 1. Define directories outside of /app to avoid read-only file system issues
13
+ # ------------------------------------------------------------------------------
14
+ RESTORE_BACKUP_DIR = "/tmp/open_webui/db_backup"
15
+ RESTORE_DATA_DIR = "/tmp/open_webui/data"
16
+
17
+ # Actual paths on the local (runtime) filesystem
18
+ TIMESTAMP_FILE_PATH = os.path.join(RESTORE_BACKUP_DIR, "last_backup_time.txt")
19
+ DB_GPG_PATH = os.path.join(RESTORE_BACKUP_DIR, "webui.db.gpg")
20
+ DB_FILE_PATH = os.path.join(RESTORE_DATA_DIR, "webui.db")
21
 
22
+ # Paths used in the Hugging Face Space repository
23
+ REPO_TIMESTAMP_FILE = "db_backup/last_backup_time.txt"
24
+ REPO_DB_GPG_FILE = "db_backup/webui.db.gpg"
 
 
25
 
26
  def check_requirements():
27
  """
28
  Verify that GPG is installed and available in the system.
 
 
 
 
29
  """
30
  try:
 
31
  subprocess.run(["gpg", "--version"], check=True, capture_output=True)
32
  return True
33
  except (subprocess.CalledProcessError, FileNotFoundError):
 
36
 
37
  def validate_secrets():
38
  """
39
+ Ensure all required environment variables are set: BACKUP_PASSPHRASE, HF_TOKEN, SPACE_ID.
 
 
 
 
40
  """
41
  required_vars = ["BACKUP_PASSPHRASE", "HF_TOKEN", "SPACE_ID"]
42
  missing = [var for var in required_vars if not os.environ.get(var)]
 
48
 
49
  def ensure_directories():
50
  """
51
+ Create necessary directories for database and backup files (in /tmp).
 
 
 
 
52
  """
53
  try:
54
+ for directory in [RESTORE_DATA_DIR, RESTORE_BACKUP_DIR]:
 
55
  os.makedirs(directory, mode=0o755, exist_ok=True)
56
  return True
57
  except Exception as e:
 
60
 
61
  def get_latest_backup_info(repo_id, hf_token):
62
  """
63
+ Check if a backup exists in the HF Space. If so, return True and a timestamp (or None).
 
 
 
 
 
 
 
64
  """
65
  api = HfApi()
66
  try:
67
+ files = api.list_repo_files(repo_id=repo_id, repo_type="space", token=hf_token)
 
 
 
 
 
 
 
 
 
68
 
69
+ # Check if the encrypted DB is present
70
+ backup_exists = (REPO_DB_GPG_FILE in files)
71
  if not backup_exists:
72
  print("No backup file found in the repository")
73
  return False, None
74
+
75
+ # Attempt to fetch the timestamp file
76
  try:
77
+ if REPO_TIMESTAMP_FILE in files:
78
+ timestamp_file = hf_hub_download(
79
+ repo_id=repo_id,
80
+ repo_type="space",
81
+ filename=REPO_TIMESTAMP_FILE,
82
+ token=hf_token
83
+ )
84
+ with open(timestamp_file, "r", encoding="utf-8") as f:
85
+ timestamp = datetime.datetime.fromisoformat(f.read().strip())
 
86
  print(f"Found backup from: {timestamp} UTC")
87
  return True, timestamp
88
+ else:
89
+ print("No timestamp file found, but backup file exists")
90
+ return True, None
91
  except Exception as e:
92
+ print(f"Could not read timestamp (possibly first run): {e}")
93
  return True, None
 
94
  except Exception as e:
95
  print(f"Error checking repository: {e}")
 
 
 
 
 
 
96
  return False, None
97
 
98
  def download_backup(repo_id, hf_token):
99
  """
100
+ Download the encrypted database backup from Hugging Face into /tmp/open_webui/db_backup.
 
 
 
 
 
 
 
101
  """
102
  try:
103
  print("Downloading encrypted database backup...")
 
104
  temp_file = hf_hub_download(
105
  repo_id=repo_id,
106
  repo_type="space",
107
+ filename=REPO_DB_GPG_FILE,
108
  token=hf_token
109
  )
110
 
111
+ # Move the downloaded file to DB_GPG_PATH
112
  os.makedirs(os.path.dirname(DB_GPG_PATH), exist_ok=True)
113
  os.replace(temp_file, DB_GPG_PATH)
114
  print("Backup downloaded successfully")
115
  return True
 
116
  except Exception as e:
117
  print(f"Error downloading backup: {e}")
118
  return False
119
 
120
  def decrypt_database(passphrase):
121
  """
122
+ Decrypt the database file using GPG, writing the decrypted DB into /tmp/open_webui/data.
 
 
 
 
 
 
123
  """
124
  if not os.path.exists(DB_GPG_PATH):
125
+ print("No encrypted backup found locally. Proceeding with fresh DB.")
126
+ return True # Not necessarily an error
127
+
128
  try:
129
  print("Decrypting database with GPG...")
130
+
131
+ # Ensure /root/.gnupg is set up
132
+ gnupg_dir = "/root/.gnupg"
133
+ os.makedirs(gnupg_dir, mode=0o700, exist_ok=True)
134
+
135
  decrypt_cmd = [
136
  "gpg",
137
  "--batch",
 
141
  "-o", DB_FILE_PATH,
142
  DB_GPG_PATH
143
  ]
 
144
  subprocess.run(decrypt_cmd, check=True, stderr=subprocess.PIPE)
145
  print(f"Database decrypted successfully to {DB_FILE_PATH}")
146
  return True
 
147
  except subprocess.CalledProcessError as e:
148
  print(f"Failed to decrypt database: {e}")
149
+ if e.stderr:
150
+ print(f"GPG error output: {e.stderr.decode(errors='ignore')}")
151
  return False
152
 
153
  def verify_database():
154
  """
155
+ Verify the integrity of the restored database using SQLite's built-in integrity check.
 
 
 
 
156
  """
157
  if not os.path.exists(DB_FILE_PATH):
158
+ # If there's no DB yet, that's okay (fresh start)
159
+ return True
160
+
161
  try:
162
  print("Verifying database integrity...")
163
  with sqlite3.connect(DB_FILE_PATH) as conn:
164
  cursor = conn.cursor()
 
 
165
  cursor.execute("PRAGMA integrity_check;")
166
  result = cursor.fetchone()[0]
 
 
167
  cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
168
  tables = cursor.fetchall()
169
 
 
178
  if len(tables) == 0:
179
  print("No tables found in database")
180
  return False
 
181
  except sqlite3.Error as e:
182
  print(f"Database verification failed: {e}")
183
  return False
 
187
 
188
  def restore_db():
189
  """
190
+ Main restore function:
191
+ 1. Check GPG installation & environment variables
192
+ 2. Create directories
193
+ 3. Check if remote backup exists, download & decrypt
194
+ 4. Verify DB
 
 
 
 
 
195
  """
 
196
  if not check_requirements() or not validate_secrets():
197
  return False
198
 
 
199
  if not ensure_directories():
200
  return False
201
 
 
202
  passphrase = os.environ["BACKUP_PASSPHRASE"]
203
  hf_token = os.environ["HF_TOKEN"]
204
  space_id = os.environ["SPACE_ID"]
205
 
 
206
  backup_exists, timestamp = get_latest_backup_info(space_id, hf_token)
 
207
  if backup_exists:
208
  if not download_backup(space_id, hf_token):
209
  print("Failed to download backup")
 
212
  if not decrypt_database(passphrase):
213
  print("Failed to decrypt database")
214
  return False
215
+
216
  if not verify_database():
217
+ print("Database integrity verification failed")
218
+ # Remove the corrupted DB so we don't keep a broken file
219
  if os.path.exists(DB_FILE_PATH):
220
  os.unlink(DB_FILE_PATH)
221
  return False
222
  else:
223
+ print("No backup found - starting with an empty/fresh database")
224
 
225
  print("Database restore completed successfully!")
226
  return True
backend/start.sh CHANGED
@@ -9,7 +9,7 @@ log_message() {
9
  }
10
 
11
  validate_environment() {
12
- # Check if we're in the expected container directory structure
13
  if [[ "$(pwd)" != *"/app/backend" ]]; then
14
  log_message "Warning: Unexpected working directory: $(pwd)"
15
  log_message "Expected path to contain: /app/backend"
@@ -30,10 +30,13 @@ validate_environment || {
30
 
31
  log_message "Working from directory: $(pwd)"
32
 
 
 
 
 
33
  SCRIPTS_DIR="$SCRIPT_DIR/scripts"
34
- DATA_DIR="$SCRIPT_DIR/data"
35
- BACKUP_DIR="$SCRIPT_DIR/db_backup"
36
-
37
 
38
  # Validate required environment variables for backup/restore operations
39
  for var in "BACKUP_PASSPHRASE" "HF_TOKEN" "SPACE_ID"; do
@@ -43,7 +46,7 @@ for var in "BACKUP_PASSPHRASE" "HF_TOKEN" "SPACE_ID"; do
43
  fi
44
  done
45
 
46
- # Restore database from backup using absolute path
47
  log_message "Restoring database from backup..."
48
  python "$SCRIPTS_DIR/restore.py"
49
  restore_status=$?
@@ -72,9 +75,9 @@ if test "$WEBUI_SECRET_KEY $WEBUI_JWT_SECRET_KEY" = " "; then
72
  }
73
  fi
74
 
75
- # Handle Ollama configuration if enabled
76
  if [[ "${USE_OLLAMA_DOCKER,,}" == "true" ]]; then
77
- log_message "USE_OLLAMA is set to true, starting ollama serve."
78
  ollama serve &
79
  fi
80
 
@@ -84,7 +87,7 @@ if [[ "${USE_CUDA_DOCKER,,}" == "true" ]]; then
84
  export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/lib/python3.11/site-packages/torch/lib:/usr/local/lib/python3.11/site-packages/nvidia/cudnn/lib"
85
  fi
86
 
87
- # Handle HuggingFace Space deployment configuration
88
  if [ -n "$SPACE_ID" ]; then
89
  echo "Configuring for HuggingFace Space deployment"
90
  if [ -n "$ADMIN_USER_EMAIL" ] && [ -n "$ADMIN_USER_PASSWORD" ]; then
@@ -113,9 +116,9 @@ WEBUI_SECRET_KEY="$WEBUI_SECRET_KEY" uvicorn open_webui.main:app \
113
  --host "$HOST" --port "$PORT" --forwarded-allow-ips '*' &
114
  WEBUI_PID=$!
115
 
116
- # Configure backup schedule with environment variable defaults
117
- BACKUP_INITIAL_WAIT="${BACKUP_INITIAL_WAIT:-500}" # Default to 500 seconds
118
- BACKUP_INTERVAL="${BACKUP_INTERVAL:-21600}" # Default to 6 hours
119
 
120
  # Start the background backup job
121
  (
@@ -143,5 +146,5 @@ BACKUP_INTERVAL="${BACKUP_INTERVAL:-21600}" # Default to 6 hours
143
  done
144
  ) &
145
 
146
- # Wait for the main web server process to keep container alive
147
  wait $WEBUI_PID
 
9
  }
10
 
11
  validate_environment() {
12
+ # Check if we're in the expected directory structure
13
  if [[ "$(pwd)" != *"/app/backend" ]]; then
14
  log_message "Warning: Unexpected working directory: $(pwd)"
15
  log_message "Expected path to contain: /app/backend"
 
30
 
31
  log_message "Working from directory: $(pwd)"
32
 
33
+ # ---------------------------------------------------------------------------
34
+ # IMPORTANT: We no longer rely on /app/backend/data or /app/backend/db_backup.
35
+ # We define external paths (matching our updated backup/restore scripts):
36
+ # ---------------------------------------------------------------------------
37
  SCRIPTS_DIR="$SCRIPT_DIR/scripts"
38
+ DATA_DIR="/tmp/open_webui/data"
39
+ BACKUP_DIR="/tmp/open_webui/db_backup"
 
40
 
41
  # Validate required environment variables for backup/restore operations
42
  for var in "BACKUP_PASSPHRASE" "HF_TOKEN" "SPACE_ID"; do
 
46
  fi
47
  done
48
 
49
+ # Restore database from backup using absolute path to restore.py
50
  log_message "Restoring database from backup..."
51
  python "$SCRIPTS_DIR/restore.py"
52
  restore_status=$?
 
75
  }
76
  fi
77
 
78
+ # Optional: Start Ollama if USE_OLLAMA_DOCKER is true
79
  if [[ "${USE_OLLAMA_DOCKER,,}" == "true" ]]; then
80
+ log_message "USE_OLLAMA_DOCKER is set to true, starting ollama serve."
81
  ollama serve &
82
  fi
83
 
 
87
  export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/lib/python3.11/site-packages/torch/lib:/usr/local/lib/python3.11/site-packages/nvidia/cudnn/lib"
88
  fi
89
 
90
+ # Handle HuggingFace Space deployment
91
  if [ -n "$SPACE_ID" ]; then
92
  echo "Configuring for HuggingFace Space deployment"
93
  if [ -n "$ADMIN_USER_EMAIL" ] && [ -n "$ADMIN_USER_PASSWORD" ]; then
 
116
  --host "$HOST" --port "$PORT" --forwarded-allow-ips '*' &
117
  WEBUI_PID=$!
118
 
119
+ # Configure backup schedule (set defaults if not provided)
120
+ BACKUP_INITIAL_WAIT="${BACKUP_INITIAL_WAIT:-500}" # 500 seconds
121
+ BACKUP_INTERVAL="${BACKUP_INTERVAL:-21600}" # 6 hours
122
 
123
  # Start the background backup job
124
  (
 
146
  done
147
  ) &
148
 
149
+ # Keep the container alive by waiting on the main web server
150
  wait $WEBUI_PID