thryyyyy commited on
Commit
a1f4f3a
·
1 Parent(s): a8c42ef

more fixes

Browse files
Files changed (1) hide show
  1. backend/scripts/backup.py +152 -64
backend/scripts/backup.py CHANGED
@@ -8,38 +8,55 @@ from pathlib import Path
8
  from huggingface_hub import HfApi, hf_hub_download, CommitOperationAdd
9
 
10
  # Get the absolute paths for our script and base directories
11
- # We use absolute paths to ensure reliability regardless of the working directory
12
  SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
13
  BASE_DIR = os.path.dirname(SCRIPT_DIR)
14
 
15
  # Define all paths as absolute paths relative to our base directory
16
- # This prevents issues when the working directory changes during execution
17
  TIMESTAMP_FILE_PATH = os.path.join(BASE_DIR, "db_backup/last_backup_time.txt")
18
  DB_GPG_PATH = os.path.join(BASE_DIR, "db_backup/webui.db.gpg")
19
  DB_FILE_PATH = os.path.join(BASE_DIR, "data/webui.db")
20
 
21
  def ensure_directories():
22
  """
23
- Create all necessary directories for our backup operation.
24
- This function runs before any file operations to ensure we have proper permissions
25
- and directory structure in place.
26
 
27
  Returns:
28
- bool: True if directories were created successfully, False otherwise
29
  """
30
  try:
31
- # Create both backup and database directories with appropriate permissions
32
  for directory in [os.path.dirname(DB_GPG_PATH), os.path.dirname(DB_FILE_PATH)]:
 
33
  os.makedirs(directory, mode=0o755, exist_ok=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  return True
35
  except Exception as e:
36
- print(f"Error creating directories: {e}")
 
 
 
37
  return False
38
 
39
  def get_last_backup_time(repo_id, hf_token):
40
  """
41
  Retrieve the timestamp of the last backup from HuggingFace Space.
42
- This helps us implement the backup threshold logic to prevent too frequent backups.
43
 
44
  Args:
45
  repo_id (str): The HuggingFace Space ID
@@ -50,7 +67,7 @@ def get_last_backup_time(repo_id, hf_token):
50
  """
51
  api = HfApi()
52
  try:
53
- # First check if the timestamp file exists in the repository
54
  files = api.list_repo_files(
55
  repo_id=repo_id,
56
  repo_type="space",
@@ -62,7 +79,7 @@ def get_last_backup_time(repo_id, hf_token):
62
  print(f"No timestamp file found in repository")
63
  return None
64
 
65
- # Download and parse the timestamp file if it exists
66
  temp_file = hf_hub_download(
67
  repo_id=repo_id,
68
  repo_type="space",
@@ -81,17 +98,24 @@ def get_last_backup_time(repo_id, hf_token):
81
  def save_timestamp_locally():
82
  """
83
  Save the current UTC time as our backup timestamp.
84
- This timestamp is used to track when backups occur and implement threshold logic.
85
 
86
  Returns:
87
- bool: True if timestamp was saved successfully, False otherwise
88
  """
89
  try:
90
- # Use UTC time with timezone information for consistent timestamps
91
  now = datetime.datetime.now(datetime.timezone.utc)
92
- os.makedirs(os.path.dirname(TIMESTAMP_FILE_PATH), exist_ok=True)
93
- with open(TIMESTAMP_FILE_PATH, "w", encoding="utf-8") as f:
 
 
 
 
 
94
  f.write(now.isoformat())
 
 
95
  return True
96
  except Exception as e:
97
  print(f"Error saving timestamp: {e}")
@@ -99,29 +123,31 @@ def save_timestamp_locally():
99
 
100
  def verify_database():
101
  """
102
- Verify the database exists and has basic integrity.
103
- Uses Python's built-in sqlite3 module to perform comprehensive checks:
104
- 1. Verifies the file exists
105
- 2. Checks database integrity
106
- 3. Validates basic schema structure
107
 
108
  Returns:
109
- bool: True if database is valid and healthy, False otherwise
110
  """
111
  if not os.path.exists(DB_FILE_PATH):
112
  print(f"Database file not found at: {DB_FILE_PATH}")
113
  return False
114
 
115
  try:
116
- # Attempt to connect to the database and perform verification
 
 
 
 
 
117
  with sqlite3.connect(DB_FILE_PATH) as conn:
118
  cursor = conn.cursor()
119
 
120
- # Run SQLite's built-in integrity check
121
  cursor.execute("PRAGMA integrity_check;")
122
  result = cursor.fetchone()[0]
123
 
124
- # Verify the database has a basic schema structure
125
  cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
126
  tables = cursor.fetchall()
127
 
@@ -138,27 +164,103 @@ def verify_database():
138
  return False
139
 
140
  except sqlite3.Error as e:
141
- print(f"Database verification failed: {e}")
 
 
 
 
142
  return False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
  except Exception as e:
144
- print(f"Unexpected error during database verification: {e}")
 
145
  return False
146
 
147
  def backup_db():
148
  """
149
- Main function to handle the database backup process.
150
- Orchestrates the entire backup operation including:
151
- 1. Environment validation
152
- 2. Directory creation
153
- 3. Threshold checking
154
- 4. Database verification
155
- 5. Encryption
156
- 6. Upload to HuggingFace
157
 
158
  Returns:
159
- bool: True if backup completed successfully, False otherwise
160
  """
161
- # Validate required environment variables
162
  passphrase = os.environ.get("BACKUP_PASSPHRASE")
163
  hf_token = os.environ.get("HF_TOKEN")
164
  space_id = os.environ.get("SPACE_ID")
@@ -167,14 +269,15 @@ def backup_db():
167
  print("Error: Missing required environment variables")
168
  return False
169
 
170
- # Ensure our directory structure exists
171
  if not ensure_directories():
 
172
  return False
173
 
174
- # Get threshold from environment variable, defaulting to 2 hours
175
  threshold_minutes = int(os.environ.get("BACKUP_THRESHOLD_MINUTES", 120))
176
 
177
- # Check if we need to perform a backup based on the threshold
178
  if threshold_minutes > 0:
179
  last_backup_dt = get_last_backup_time(space_id, hf_token)
180
  if last_backup_dt is not None:
@@ -190,42 +293,27 @@ def backup_db():
190
  else:
191
  print("Backup threshold check disabled for testing")
192
 
193
- # Verify database integrity before proceeding
194
  if not verify_database():
195
  print("Database verification failed, aborting backup")
196
  return False
197
 
198
- # Proceed with backup encryption
199
- print("Encrypting database with GPG...")
200
- encrypt_cmd = [
201
- "gpg",
202
- "--batch",
203
- "--yes",
204
- "--passphrase", passphrase,
205
- "-c",
206
- "--cipher-algo", "AES256",
207
- "-o", DB_GPG_PATH,
208
- DB_FILE_PATH
209
- ]
210
-
211
- try:
212
- subprocess.run(encrypt_cmd, check=True)
213
- except subprocess.CalledProcessError as e:
214
- print(f"Encryption failed: {e}")
215
  return False
216
 
217
- print(f"Database encrypted successfully to {DB_GPG_PATH}")
218
-
219
  # Save the backup timestamp
220
  if not save_timestamp_locally():
 
221
  return False
222
 
223
  # Upload to HuggingFace Spaces
224
- print("Uploading to Hugging Face Spaces...")
225
  api = HfApi()
226
 
227
  try:
228
- # Prepare file operations for both backup and timestamp
229
  operations = [
230
  CommitOperationAdd(
231
  path_in_repo=os.path.relpath(DB_GPG_PATH, BASE_DIR),
@@ -237,7 +325,7 @@ def backup_db():
237
  )
238
  ]
239
 
240
- # Commit both files to the repository
241
  api.create_commit(
242
  repo_id=space_id,
243
  repo_type="space",
@@ -245,7 +333,7 @@ def backup_db():
245
  commit_message="Update encrypted database backup + timestamp",
246
  token=hf_token
247
  )
248
- print("DB backup + timestamp uploaded successfully!")
249
  return True
250
 
251
  except Exception as e:
 
8
  from huggingface_hub import HfApi, hf_hub_download, CommitOperationAdd
9
 
10
  # Get the absolute paths for our script and base directories
 
11
  SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
12
  BASE_DIR = os.path.dirname(SCRIPT_DIR)
13
 
14
  # Define all paths as absolute paths relative to our base directory
 
15
  TIMESTAMP_FILE_PATH = os.path.join(BASE_DIR, "db_backup/last_backup_time.txt")
16
  DB_GPG_PATH = os.path.join(BASE_DIR, "db_backup/webui.db.gpg")
17
  DB_FILE_PATH = os.path.join(BASE_DIR, "data/webui.db")
18
 
19
  def ensure_directories():
20
  """
21
+ Create and verify all necessary directories for our backup operation.
22
+ Includes thorough permission checking and write verification.
 
23
 
24
  Returns:
25
+ bool: True if directories were created and verified successfully
26
  """
27
  try:
 
28
  for directory in [os.path.dirname(DB_GPG_PATH), os.path.dirname(DB_FILE_PATH)]:
29
+ # Create directory with full permissions for the current user
30
  os.makedirs(directory, mode=0o755, exist_ok=True)
31
+
32
+ # Verify directory exists and get its current permissions
33
+ dir_stat = os.stat(directory)
34
+ print(f"Directory {directory} created with permissions: {oct(dir_stat.st_mode)[-3:]}")
35
+
36
+ # Verify we can write to the directory with a test file
37
+ test_file = os.path.join(directory, '.write_test')
38
+ try:
39
+ with open(test_file, 'w') as f:
40
+ f.write('test')
41
+ os.remove(test_file)
42
+ print(f"Successfully verified write access to {directory}")
43
+ except Exception as e:
44
+ print(f"Directory {directory} is not writable: {e}")
45
+ print(f"Current process user ID: {os.getuid()}")
46
+ print(f"Directory owner ID: {dir_stat.st_uid}")
47
+ return False
48
+
49
  return True
50
  except Exception as e:
51
+ print(f"Error in directory creation/verification: {e}")
52
+ print(f"Current process user ID: {os.getuid()}")
53
+ print(f"Current process group ID: {os.getgid()}")
54
+ print(f"Current working directory: {os.getcwd()}")
55
  return False
56
 
57
  def get_last_backup_time(repo_id, hf_token):
58
  """
59
  Retrieve the timestamp of the last backup from HuggingFace Space.
 
60
 
61
  Args:
62
  repo_id (str): The HuggingFace Space ID
 
67
  """
68
  api = HfApi()
69
  try:
70
+ # Check if the timestamp file exists in the repository
71
  files = api.list_repo_files(
72
  repo_id=repo_id,
73
  repo_type="space",
 
79
  print(f"No timestamp file found in repository")
80
  return None
81
 
82
+ # Download and parse the timestamp file
83
  temp_file = hf_hub_download(
84
  repo_id=repo_id,
85
  repo_type="space",
 
98
  def save_timestamp_locally():
99
  """
100
  Save the current UTC time as our backup timestamp.
101
+ Includes error handling and directory creation.
102
 
103
  Returns:
104
+ bool: True if timestamp was saved successfully
105
  """
106
  try:
107
+ # Use UTC time with timezone information
108
  now = datetime.datetime.now(datetime.timezone.utc)
109
+
110
+ # Ensure the directory exists
111
+ os.makedirs(os.path.dirname(TIMESTAMP_FILE_PATH), mode=0o755, exist_ok=True)
112
+
113
+ # Write the timestamp atomically using a temporary file
114
+ temp_path = f"{TIMESTAMP_FILE_PATH}.tmp"
115
+ with open(temp_path, "w", encoding="utf-8") as f:
116
  f.write(now.isoformat())
117
+ os.replace(temp_path, TIMESTAMP_FILE_PATH)
118
+
119
  return True
120
  except Exception as e:
121
  print(f"Error saving timestamp: {e}")
 
123
 
124
  def verify_database():
125
  """
126
+ Comprehensive verification of the database's existence and integrity.
127
+ Uses SQLite's built-in integrity check and schema validation.
 
 
 
128
 
129
  Returns:
130
+ bool: True if database is valid and healthy
131
  """
132
  if not os.path.exists(DB_FILE_PATH):
133
  print(f"Database file not found at: {DB_FILE_PATH}")
134
  return False
135
 
136
  try:
137
+ # Get file information for debugging
138
+ file_stat = os.stat(DB_FILE_PATH)
139
+ print(f"Database file size: {file_stat.st_size:,} bytes")
140
+ print(f"Database file permissions: {oct(file_stat.st_mode)[-3:]}")
141
+
142
+ # Verify database integrity
143
  with sqlite3.connect(DB_FILE_PATH) as conn:
144
  cursor = conn.cursor()
145
 
146
+ # Run comprehensive integrity check
147
  cursor.execute("PRAGMA integrity_check;")
148
  result = cursor.fetchone()[0]
149
 
150
+ # Verify schema structure
151
  cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
152
  tables = cursor.fetchall()
153
 
 
164
  return False
165
 
166
  except sqlite3.Error as e:
167
+ print(f"SQLite error during verification: {e}")
168
+ return False
169
+ except Exception as e:
170
+ print(f"Unexpected error during verification: {e}")
171
+ print(f"Error type: {type(e).__name__}")
172
  return False
173
+
174
+ def encrypt_database(passphrase):
175
+ """
176
+ Encrypt the database file using GPG with comprehensive error checking.
177
+
178
+ Args:
179
+ passphrase (str): The encryption passphrase
180
+
181
+ Returns:
182
+ bool: True if encryption was successful
183
+ """
184
+ try:
185
+ print("\nPreparing for database encryption...")
186
+
187
+ # Verify source file
188
+ if not os.path.exists(DB_FILE_PATH):
189
+ print(f"Source database file not found: {DB_FILE_PATH}")
190
+ return False
191
+
192
+ source_size = os.path.getsize(DB_FILE_PATH)
193
+ print(f"Source file exists and is {source_size:,} bytes")
194
+
195
+ # Verify target directory
196
+ target_dir = os.path.dirname(DB_GPG_PATH)
197
+ if not os.path.exists(target_dir):
198
+ print(f"Target directory doesn't exist: {target_dir}")
199
+ return False
200
+
201
+ print(f"Target directory exists and {'is' if os.access(target_dir, os.W_OK) else 'is not'} writable")
202
+
203
+ # Remove existing backup file if it exists
204
+ if os.path.exists(DB_GPG_PATH):
205
+ try:
206
+ os.remove(DB_GPG_PATH)
207
+ print("Removed existing backup file")
208
+ except Exception as e:
209
+ print(f"Warning: Could not remove existing backup: {e}")
210
+ return False
211
+
212
+ # Prepare GPG command
213
+ encrypt_cmd = [
214
+ "gpg",
215
+ "--batch",
216
+ "--yes",
217
+ "--passphrase", passphrase,
218
+ "-c",
219
+ "--cipher-algo", "AES256",
220
+ "-o", DB_GPG_PATH,
221
+ DB_FILE_PATH
222
+ ]
223
+
224
+ # Run GPG with output capturing
225
+ print("\nRunning GPG encryption...")
226
+ result = subprocess.run(
227
+ encrypt_cmd,
228
+ capture_output=True,
229
+ text=True,
230
+ check=False
231
+ )
232
+
233
+ if result.returncode != 0:
234
+ print(f"GPG encryption failed with code {result.returncode}")
235
+ if result.stdout:
236
+ print(f"GPG stdout: {result.stdout}")
237
+ if result.stderr:
238
+ print(f"GPG stderr: {result.stderr}")
239
+ return False
240
+
241
+ # Verify the encrypted file was created
242
+ if not os.path.exists(DB_GPG_PATH):
243
+ print("GPG reported success but encrypted file not found")
244
+ return False
245
+
246
+ encrypted_size = os.path.getsize(DB_GPG_PATH)
247
+ print(f"Encryption successful. Encrypted file size: {encrypted_size:,} bytes")
248
+ return True
249
+
250
  except Exception as e:
251
+ print(f"Encryption failed with exception: {e}")
252
+ print(f"Exception type: {type(e).__name__}")
253
  return False
254
 
255
  def backup_db():
256
  """
257
+ Main function to handle the complete database backup process.
258
+ Includes environment validation, threshold checking, encryption, and upload.
 
 
 
 
 
 
259
 
260
  Returns:
261
+ bool: True if backup completed successfully
262
  """
263
+ # Validate environment variables
264
  passphrase = os.environ.get("BACKUP_PASSPHRASE")
265
  hf_token = os.environ.get("HF_TOKEN")
266
  space_id = os.environ.get("SPACE_ID")
 
269
  print("Error: Missing required environment variables")
270
  return False
271
 
272
+ # Ensure directory structure exists
273
  if not ensure_directories():
274
+ print("Failed to create or verify directories")
275
  return False
276
 
277
+ # Get backup threshold setting
278
  threshold_minutes = int(os.environ.get("BACKUP_THRESHOLD_MINUTES", 120))
279
 
280
+ # Check if backup is needed based on threshold
281
  if threshold_minutes > 0:
282
  last_backup_dt = get_last_backup_time(space_id, hf_token)
283
  if last_backup_dt is not None:
 
293
  else:
294
  print("Backup threshold check disabled for testing")
295
 
296
+ # Verify database integrity
297
  if not verify_database():
298
  print("Database verification failed, aborting backup")
299
  return False
300
 
301
+ # Encrypt the database
302
+ if not encrypt_database(passphrase):
303
+ print("Database encryption failed, aborting backup")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
304
  return False
305
 
 
 
306
  # Save the backup timestamp
307
  if not save_timestamp_locally():
308
+ print("Failed to save timestamp")
309
  return False
310
 
311
  # Upload to HuggingFace Spaces
312
+ print("\nUploading to Hugging Face Spaces...")
313
  api = HfApi()
314
 
315
  try:
316
+ # Prepare file operations
317
  operations = [
318
  CommitOperationAdd(
319
  path_in_repo=os.path.relpath(DB_GPG_PATH, BASE_DIR),
 
325
  )
326
  ]
327
 
328
+ # Create commit with both files
329
  api.create_commit(
330
  repo_id=space_id,
331
  repo_type="space",
 
333
  commit_message="Update encrypted database backup + timestamp",
334
  token=hf_token
335
  )
336
+ print("Backup files uploaded successfully!")
337
  return True
338
 
339
  except Exception as e: