thryyyyy commited on
Commit
5c56478
·
1 Parent(s): aa04220

more fixes

Browse files
Files changed (1) hide show
  1. backend/scripts/backup.py +68 -67
backend/scripts/backup.py CHANGED
@@ -5,66 +5,67 @@ import subprocess
5
  import datetime
6
  import sqlite3
7
  from pathlib import Path
8
-
9
  from huggingface_hub import HfApi, hf_hub_download, CommitOperationAdd
10
 
11
- # ------------------------------------------------------------------------------
12
- # 1. Define directories outside of /app to avoid read-only file system issues
13
- # ------------------------------------------------------------------------------
14
- BACKUP_DIR = "/tmp/open_webui/db_backup"
15
- DATA_DIR = "/tmp/open_webui/data"
 
 
16
 
17
- # Actual files on the local (runtime) filesystem
 
 
 
 
18
  TIMESTAMP_FILE_PATH = os.path.join(BACKUP_DIR, "last_backup_time.txt")
19
  DB_GPG_PATH = os.path.join(BACKUP_DIR, "webui.db.gpg")
20
- DB_FILE_PATH = os.path.join(DATA_DIR, "webui.db")
21
 
22
- # Paths to store in the Hugging Face Space repository
23
- # (so your commit will look like "db_backup/webui.db.gpg" etc.)
24
  REPO_TIMESTAMP_FILE = "db_backup/last_backup_time.txt"
25
  REPO_DB_GPG_FILE = "db_backup/webui.db.gpg"
26
 
 
27
  def ensure_directories():
28
  """
29
- Create and verify all necessary directories for our backup operation
30
- in /tmp/open_webui/ rather than /app.
31
-
32
- Returns:
33
- bool: True if directories exist and are writable
34
  """
35
  try:
36
- for directory in [BACKUP_DIR, DATA_DIR]:
37
- os.makedirs(directory, mode=0o755, exist_ok=True)
38
- dir_stat = os.stat(directory)
39
- print(f"Directory {directory} created or exists with permissions: {oct(dir_stat.st_mode)[-3:]}")
40
-
41
- # Verify we can write to the directory
42
- test_file = os.path.join(directory, '.write_test')
43
- with open(test_file, 'w') as f:
44
- f.write('test')
45
- os.remove(test_file)
46
- print(f"Successfully verified write access to {directory}")
47
-
48
  return True
49
  except Exception as e:
50
- print(f"Error creating/verifying directories: {e}")
51
  return False
52
 
 
53
  def verify_database():
54
  """
55
- Verify the database exists and passes a basic SQLite integrity check.
56
  """
57
  if not os.path.exists(DB_FILE_PATH):
58
  print(f"Database file not found at: {DB_FILE_PATH}")
59
  return False
60
 
61
  try:
62
- # Print file info for debugging
63
  file_stat = os.stat(DB_FILE_PATH)
64
  print(f"Database file size: {file_stat.st_size:,} bytes")
65
  print(f"Database file permissions: {oct(file_stat.st_mode)[-3:]}")
66
 
67
- # Run SQLite integrity check
68
  with sqlite3.connect(DB_FILE_PATH) as conn:
69
  cursor = conn.cursor()
70
  cursor.execute("PRAGMA integrity_check;")
@@ -90,22 +91,20 @@ def verify_database():
90
  print(f"Unexpected error during verification: {e}")
91
  return False
92
 
 
93
  def encrypt_database(passphrase):
94
  """
95
- Encrypt the database file using GPG (AES256), writing the .gpg file
96
- to /tmp/open_webui/db_backup.
97
-
98
- Returns:
99
- bool: True if successful, False otherwise
100
  """
101
  try:
102
  print("\nPreparing for database encryption...")
103
 
104
- # Ensure GPG home directory is created with correct perms
105
  gnupg_dir = '/root/.gnupg'
106
  os.makedirs(gnupg_dir, mode=0o700, exist_ok=True)
107
 
108
- print("\nRunning GPG encryption...")
109
  encrypt_cmd = [
110
  "gpg",
111
  "--batch",
@@ -139,22 +138,22 @@ def encrypt_database(passphrase):
139
  print(f"Encryption failed with exception: {e}")
140
  return False
141
 
 
142
  def get_last_backup_time(repo_id, hf_token):
143
  """
144
- Fetch last backup timestamp from the Hugging Face Space.
145
-
146
- Returns:
147
- datetime or None
148
  """
149
  try:
150
  api = HfApi()
151
- files = api.list_repo_files(repo_id=repo_id, repo_type="space", token=hf_token)
152
-
 
 
 
153
  if REPO_TIMESTAMP_FILE not in files:
154
  print("No timestamp file found in repository")
155
  return None
156
 
157
- # Download the timestamp file
158
  temp_file = hf_hub_download(
159
  repo_id=repo_id,
160
  repo_type="space",
@@ -169,9 +168,10 @@ def get_last_backup_time(repo_id, hf_token):
169
  print(f"Error getting last backup time: {e}")
170
  return None
171
 
 
172
  def save_timestamp_locally():
173
  """
174
- Save the current UTC time as our local timestamp.
175
  """
176
  try:
177
  now = datetime.datetime.now(datetime.timezone.utc)
@@ -187,70 +187,70 @@ def save_timestamp_locally():
187
  print(f"Error saving timestamp: {e}")
188
  return False
189
 
 
190
  def backup_db():
191
  """
192
- Main entry point for performing a backup:
193
- 1. Ensure directories are writable
194
- 2. Check if backup is needed (threshold)
195
- 3. Verify DB
196
- 4. Encrypt DB
197
- 5. Save new timestamp
198
- 6. Upload to HF Space
 
199
  """
200
  passphrase = os.environ.get("BACKUP_PASSPHRASE")
201
  hf_token = os.environ.get("HF_TOKEN")
202
  space_id = os.environ.get("SPACE_ID")
203
 
204
  if not all([passphrase, hf_token, space_id]):
205
- print("Error: Missing one of BACKUP_PASSPHRASE, HF_TOKEN, SPACE_ID")
206
  return False
207
 
208
  if not ensure_directories():
209
- print("Failed to create or verify directories")
210
  return False
211
 
 
212
  threshold_minutes = int(os.environ.get("BACKUP_THRESHOLD_MINUTES", 120))
213
  if threshold_minutes > 0:
214
  last_backup_dt = get_last_backup_time(space_id, hf_token)
215
  if last_backup_dt is not None:
216
  now = datetime.datetime.now(datetime.timezone.utc)
217
- # ensure last_backup_dt has a tz
218
  if not last_backup_dt.tzinfo:
219
  last_backup_dt = last_backup_dt.replace(tzinfo=datetime.timezone.utc)
220
  elapsed = now - last_backup_dt
221
  if elapsed.total_seconds() < threshold_minutes * 60:
222
  print(f"Last backup was only {elapsed.total_seconds()/3600:.2f} hours ago")
223
  print(f"Threshold is {threshold_minutes} minutes")
224
- print("Skipping backup to avoid rebuilding too often")
225
  return True
226
  else:
227
- print("Backup threshold check disabled for testing")
228
 
229
- # Verify the local database
230
  if not verify_database():
231
- print("Database verification failed, aborting backup")
232
  return False
233
 
234
- # Encrypt the database
235
  if not encrypt_database(passphrase):
236
- print("Database encryption failed, aborting backup")
237
  return False
238
 
239
- # Save timestamp to local file
240
  if not save_timestamp_locally():
241
- print("Failed to save timestamp locally, but continuing to upload anyway...")
242
 
243
  # Upload to Hugging Face
244
  print("\nUploading to Hugging Face Spaces...")
245
  try:
246
  api = HfApi()
247
  operations = [
248
- # Add the .gpg file
249
  CommitOperationAdd(
250
  path_in_repo=REPO_DB_GPG_FILE,
251
  path_or_fileobj=DB_GPG_PATH
252
  ),
253
- # Add the timestamp file
254
  CommitOperationAdd(
255
  path_in_repo=REPO_TIMESTAMP_FILE,
256
  path_or_fileobj=TIMESTAMP_FILE_PATH
@@ -267,9 +267,10 @@ def backup_db():
267
  print("Backup files uploaded successfully!")
268
  return True
269
  except Exception as e:
270
- print(f"Error uploading to Hugging Face: {e}")
271
  return False
272
 
 
273
  if __name__ == "__main__":
274
  success = backup_db()
275
  sys.exit(0 if success else 1)
 
5
  import datetime
6
  import sqlite3
7
  from pathlib import Path
 
8
  from huggingface_hub import HfApi, hf_hub_download, CommitOperationAdd
9
 
10
+ ###############################################################################
11
+ # 1) Determine where the *actual* database file lives
12
+ # - We read DATA_DIR from the environment, defaulting to "/app/backend/data"
13
+ # - Then the DB is "webui.db" in that directory.
14
+ ###############################################################################
15
+ DATA_DIR = os.environ.get("DATA_DIR", "/app/backend/data")
16
+ DB_FILE_PATH = os.path.join(DATA_DIR, "webui.db")
17
 
18
+ ###############################################################################
19
+ # 2) Choose a writable directory for our backup artifacts (encrypted .gpg, etc.)
20
+ # By default, "/tmp" is writable on Hugging Face Spaces.
21
+ ###############################################################################
22
+ BACKUP_DIR = "/tmp/open_webui/db_backup"
23
  TIMESTAMP_FILE_PATH = os.path.join(BACKUP_DIR, "last_backup_time.txt")
24
  DB_GPG_PATH = os.path.join(BACKUP_DIR, "webui.db.gpg")
 
25
 
26
+ # Paths in the Hugging Face repo
 
27
  REPO_TIMESTAMP_FILE = "db_backup/last_backup_time.txt"
28
  REPO_DB_GPG_FILE = "db_backup/webui.db.gpg"
29
 
30
+
31
  def ensure_directories():
32
  """
33
+ Create and verify the /tmp/open_webui/db_backup directory.
34
+ We only need to ensure the backup directory is writable, because
35
+ the DB itself is at DATA_DIR, which might be read-only or read-write
36
+ depending on your environment.
 
37
  """
38
  try:
39
+ os.makedirs(BACKUP_DIR, mode=0o755, exist_ok=True)
40
+ dir_stat = os.stat(BACKUP_DIR)
41
+ print(f"Backup directory {BACKUP_DIR} created or exists with permissions: {oct(dir_stat.st_mode)[-3:]}")
42
+
43
+ # Verify we can write to this directory
44
+ test_file = os.path.join(BACKUP_DIR, '.write_test')
45
+ with open(test_file, 'w') as f:
46
+ f.write('test')
47
+ os.remove(test_file)
48
+ print(f"Successfully verified write access to {BACKUP_DIR}")
49
+
 
50
  return True
51
  except Exception as e:
52
+ print(f"Error creating/verifying backup directory: {e}")
53
  return False
54
 
55
+
56
  def verify_database():
57
  """
58
+ Ensure the actual database file exists and passes a basic SQLite integrity check.
59
  """
60
  if not os.path.exists(DB_FILE_PATH):
61
  print(f"Database file not found at: {DB_FILE_PATH}")
62
  return False
63
 
64
  try:
 
65
  file_stat = os.stat(DB_FILE_PATH)
66
  print(f"Database file size: {file_stat.st_size:,} bytes")
67
  print(f"Database file permissions: {oct(file_stat.st_mode)[-3:]}")
68
 
 
69
  with sqlite3.connect(DB_FILE_PATH) as conn:
70
  cursor = conn.cursor()
71
  cursor.execute("PRAGMA integrity_check;")
 
91
  print(f"Unexpected error during verification: {e}")
92
  return False
93
 
94
+
95
  def encrypt_database(passphrase):
96
  """
97
+ Encrypt the real DB file (DB_FILE_PATH) using GPG, outputting
98
+ the .gpg file to /tmp/open_webui/db_backup.
 
 
 
99
  """
100
  try:
101
  print("\nPreparing for database encryption...")
102
 
103
+ # Ensure a GPG home directory for key storage
104
  gnupg_dir = '/root/.gnupg'
105
  os.makedirs(gnupg_dir, mode=0o700, exist_ok=True)
106
 
107
+ print("Running GPG encryption...")
108
  encrypt_cmd = [
109
  "gpg",
110
  "--batch",
 
138
  print(f"Encryption failed with exception: {e}")
139
  return False
140
 
141
+
142
  def get_last_backup_time(repo_id, hf_token):
143
  """
144
+ Fetch last backup timestamp from the Hugging Face Space (if present).
 
 
 
145
  """
146
  try:
147
  api = HfApi()
148
+ files = api.list_repo_files(
149
+ repo_id=repo_id,
150
+ repo_type="space",
151
+ token=hf_token
152
+ )
153
  if REPO_TIMESTAMP_FILE not in files:
154
  print("No timestamp file found in repository")
155
  return None
156
 
 
157
  temp_file = hf_hub_download(
158
  repo_id=repo_id,
159
  repo_type="space",
 
168
  print(f"Error getting last backup time: {e}")
169
  return None
170
 
171
+
172
  def save_timestamp_locally():
173
  """
174
+ Save the current UTC time to /tmp/open_webui/db_backup/last_backup_time.txt.
175
  """
176
  try:
177
  now = datetime.datetime.now(datetime.timezone.utc)
 
187
  print(f"Error saving timestamp: {e}")
188
  return False
189
 
190
+
191
  def backup_db():
192
  """
193
+ Main backup entry point:
194
+ 1. Validate env variables
195
+ 2. Ensure /tmp/open_webui/db_backup is writable
196
+ 3. Check threshold to skip if recently backed up
197
+ 4. Verify DB
198
+ 5. Encrypt DB
199
+ 6. Save local timestamp
200
+ 7. Upload .gpg + timestamp to Hugging Face
201
  """
202
  passphrase = os.environ.get("BACKUP_PASSPHRASE")
203
  hf_token = os.environ.get("HF_TOKEN")
204
  space_id = os.environ.get("SPACE_ID")
205
 
206
  if not all([passphrase, hf_token, space_id]):
207
+ print("Error: Missing required environment variables for backup (BACKUP_PASSPHRASE, HF_TOKEN, SPACE_ID).")
208
  return False
209
 
210
  if not ensure_directories():
211
+ print("Failed to create or verify backup directories.")
212
  return False
213
 
214
+ # Check threshold
215
  threshold_minutes = int(os.environ.get("BACKUP_THRESHOLD_MINUTES", 120))
216
  if threshold_minutes > 0:
217
  last_backup_dt = get_last_backup_time(space_id, hf_token)
218
  if last_backup_dt is not None:
219
  now = datetime.datetime.now(datetime.timezone.utc)
 
220
  if not last_backup_dt.tzinfo:
221
  last_backup_dt = last_backup_dt.replace(tzinfo=datetime.timezone.utc)
222
  elapsed = now - last_backup_dt
223
  if elapsed.total_seconds() < threshold_minutes * 60:
224
  print(f"Last backup was only {elapsed.total_seconds()/3600:.2f} hours ago")
225
  print(f"Threshold is {threshold_minutes} minutes")
226
+ print("Skipping backup to avoid frequent rebuilds")
227
  return True
228
  else:
229
+ print("Backup threshold check disabled (BACKUP_THRESHOLD_MINUTES=0).")
230
 
231
+ # Verify local DB
232
  if not verify_database():
233
+ print("Database verification failed, aborting backup.")
234
  return False
235
 
236
+ # Encrypt the DB
237
  if not encrypt_database(passphrase):
238
+ print("Database encryption failed, aborting backup.")
239
  return False
240
 
241
+ # Save local timestamp
242
  if not save_timestamp_locally():
243
+ print("Warning: Failed to save timestamp locally, but continuing to upload.")
244
 
245
  # Upload to Hugging Face
246
  print("\nUploading to Hugging Face Spaces...")
247
  try:
248
  api = HfApi()
249
  operations = [
 
250
  CommitOperationAdd(
251
  path_in_repo=REPO_DB_GPG_FILE,
252
  path_or_fileobj=DB_GPG_PATH
253
  ),
 
254
  CommitOperationAdd(
255
  path_in_repo=REPO_TIMESTAMP_FILE,
256
  path_or_fileobj=TIMESTAMP_FILE_PATH
 
267
  print("Backup files uploaded successfully!")
268
  return True
269
  except Exception as e:
270
+ print(f"Error uploading backup to HuggingFace: {e}")
271
  return False
272
 
273
+
274
  if __name__ == "__main__":
275
  success = backup_db()
276
  sys.exit(0 if success else 1)