thryyyyy commited on
Commit
77f7573
·
1 Parent(s): 47ac644

more fixes

Browse files
Dockerfile CHANGED
@@ -133,7 +133,7 @@ RUN if [ "$USE_OLLAMA" = "true" ]; then \
133
  # install python dependencies
134
  COPY --chown=$UID:$GID ./backend/requirements.txt ./requirements.txt
135
 
136
- RUN pip3 install huggingface_hub
137
  RUN pip3 install uv && \
138
  if [ "$USE_CUDA" = "true" ]; then \
139
  # If you use CUDA the whisper and embedding model will be downloaded on first use
 
133
  # install python dependencies
134
  COPY --chown=$UID:$GID ./backend/requirements.txt ./requirements.txt
135
 
136
+ RUN pip3 install huggingface_hub sqlite3
137
  RUN pip3 install uv && \
138
  if [ "$USE_CUDA" = "true" ]; then \
139
  # If you use CUDA the whisper and embedding model will be downloaded on first use
backend/requirements.txt CHANGED
@@ -107,3 +107,4 @@ ldap3==2.9.1
107
 
108
  gnupg
109
  huggingface_hub
 
 
107
 
108
  gnupg
109
  huggingface_hub
110
+ sqlite3
backend/scripts/backup.py CHANGED
@@ -3,12 +3,17 @@ import os
3
  import sys
4
  import subprocess
5
  import datetime
 
6
  from pathlib import Path
7
  from huggingface_hub import HfApi, hf_hub_download, CommitOperationAdd
8
 
 
 
9
  SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
10
  BASE_DIR = os.path.dirname(SCRIPT_DIR)
11
 
 
 
12
  TIMESTAMP_FILE_PATH = os.path.join(BASE_DIR, "db_backup/last_backup_time.txt")
13
  DB_GPG_PATH = os.path.join(BASE_DIR, "db_backup/webui.db.gpg")
14
  DB_FILE_PATH = os.path.join(BASE_DIR, "data/webui.db")
@@ -16,10 +21,14 @@ DB_FILE_PATH = os.path.join(BASE_DIR, "data/webui.db")
16
  def ensure_directories():
17
  """
18
  Create all necessary directories for our backup operation.
19
- Using absolute paths and proper permissions ensures consistent behavior.
 
 
 
 
20
  """
21
  try:
22
- # Create directories for both backup and database files
23
  for directory in [os.path.dirname(DB_GPG_PATH), os.path.dirname(DB_FILE_PATH)]:
24
  os.makedirs(directory, mode=0o755, exist_ok=True)
25
  return True
@@ -30,7 +39,7 @@ def ensure_directories():
30
  def get_last_backup_time(repo_id, hf_token):
31
  """
32
  Retrieve the timestamp of the last backup from HuggingFace Space.
33
- This helps us determine if we need to perform a new backup.
34
 
35
  Args:
36
  repo_id (str): The HuggingFace Space ID
@@ -41,7 +50,7 @@ def get_last_backup_time(repo_id, hf_token):
41
  """
42
  api = HfApi()
43
  try:
44
- # First check if the file exists in the repository
45
  files = api.list_repo_files(
46
  repo_id=repo_id,
47
  repo_type="space",
@@ -53,7 +62,7 @@ def get_last_backup_time(repo_id, hf_token):
53
  print(f"No timestamp file found in repository")
54
  return None
55
 
56
- # Download and parse the timestamp file
57
  temp_file = hf_hub_download(
58
  repo_id=repo_id,
59
  repo_type="space",
@@ -72,9 +81,13 @@ def get_last_backup_time(repo_id, hf_token):
72
  def save_timestamp_locally():
73
  """
74
  Save the current UTC time as our backup timestamp.
75
- Creates the backup directory if it doesn't exist.
 
 
 
76
  """
77
  try:
 
78
  now = datetime.datetime.now(datetime.timezone.utc)
79
  os.makedirs(os.path.dirname(TIMESTAMP_FILE_PATH), exist_ok=True)
80
  with open(TIMESTAMP_FILE_PATH, "w", encoding="utf-8") as f:
@@ -87,26 +100,65 @@ def save_timestamp_locally():
87
  def verify_database():
88
  """
89
  Verify the database exists and has basic integrity.
90
- This prevents us from backing up corrupted data.
 
 
 
 
 
 
91
  """
92
  if not os.path.exists(DB_FILE_PATH):
93
  print(f"Database file not found at: {DB_FILE_PATH}")
94
  return False
95
 
96
  try:
97
- verify_cmd = ["sqlite3", DB_FILE_PATH, "PRAGMA integrity_check;"]
98
- result = subprocess.run(verify_cmd, capture_output=True, text=True, check=True)
99
- return "ok" in result.stdout.lower()
100
- except subprocess.CalledProcessError as e:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  print(f"Database verification failed: {e}")
102
  return False
 
 
 
103
 
104
  def backup_db():
105
  """
106
  Main function to handle the database backup process.
107
- Includes verification, encryption, and upload to HuggingFace.
 
 
 
 
 
 
 
 
 
108
  """
109
- # Ensure we have all required environment variables
110
  passphrase = os.environ.get("BACKUP_PASSPHRASE")
111
  hf_token = os.environ.get("HF_TOKEN")
112
  space_id = os.environ.get("SPACE_ID")
@@ -115,14 +167,14 @@ def backup_db():
115
  print("Error: Missing required environment variables")
116
  return False
117
 
118
- # Create necessary directories first
119
  if not ensure_directories():
120
  return False
121
 
122
  # Get threshold from environment variable, defaulting to 2 hours
123
  threshold_minutes = int(os.environ.get("BACKUP_THRESHOLD_MINUTES", 120))
124
 
125
- # Check if backup is needed based on threshold
126
  if threshold_minutes > 0:
127
  last_backup_dt = get_last_backup_time(space_id, hf_token)
128
  if last_backup_dt is not None:
@@ -131,19 +183,19 @@ def backup_db():
131
  last_backup_dt = last_backup_dt.replace(tzinfo=datetime.timezone.utc)
132
  elapsed = now - last_backup_dt
133
  if elapsed.total_seconds() < threshold_minutes * 60:
134
- print(f"Last backup was only {elapsed.total_seconds()/3600:.2f} hours ago.")
135
- print(f"Threshold is {threshold_minutes} minutes.")
136
- print("Skipping backup to avoid rebuild loop.")
137
  return True
138
  else:
139
  print("Backup threshold check disabled for testing")
140
 
141
- # Verify database before backing up
142
  if not verify_database():
143
  print("Database verification failed, aborting backup")
144
  return False
145
 
146
- # Proceed with backup...
147
  print("Encrypting database with GPG...")
148
  encrypt_cmd = [
149
  "gpg",
@@ -164,16 +216,16 @@ def backup_db():
164
 
165
  print(f"Database encrypted successfully to {DB_GPG_PATH}")
166
 
167
- # Update the timestamp file locally
168
  if not save_timestamp_locally():
169
  return False
170
 
171
- # Upload to Hugging Face Spaces
172
  print("Uploading to Hugging Face Spaces...")
173
  api = HfApi()
174
 
175
  try:
176
- # Create operations list for both files
177
  operations = [
178
  CommitOperationAdd(
179
  path_in_repo=os.path.relpath(DB_GPG_PATH, BASE_DIR),
@@ -185,6 +237,7 @@ def backup_db():
185
  )
186
  ]
187
 
 
188
  api.create_commit(
189
  repo_id=space_id,
190
  repo_type="space",
 
3
  import sys
4
  import subprocess
5
  import datetime
6
+ import sqlite3
7
  from pathlib import Path
8
  from huggingface_hub import HfApi, hf_hub_download, CommitOperationAdd
9
 
10
+ # Get the absolute paths for our script and base directories
11
+ # We use absolute paths to ensure reliability regardless of the working directory
12
  SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
13
  BASE_DIR = os.path.dirname(SCRIPT_DIR)
14
 
15
+ # Define all paths as absolute paths relative to our base directory
16
+ # This prevents issues when the working directory changes during execution
17
  TIMESTAMP_FILE_PATH = os.path.join(BASE_DIR, "db_backup/last_backup_time.txt")
18
  DB_GPG_PATH = os.path.join(BASE_DIR, "db_backup/webui.db.gpg")
19
  DB_FILE_PATH = os.path.join(BASE_DIR, "data/webui.db")
 
21
  def ensure_directories():
22
  """
23
  Create all necessary directories for our backup operation.
24
+ This function runs before any file operations to ensure we have proper permissions
25
+ and directory structure in place.
26
+
27
+ Returns:
28
+ bool: True if directories were created successfully, False otherwise
29
  """
30
  try:
31
+ # Create both backup and database directories with appropriate permissions
32
  for directory in [os.path.dirname(DB_GPG_PATH), os.path.dirname(DB_FILE_PATH)]:
33
  os.makedirs(directory, mode=0o755, exist_ok=True)
34
  return True
 
39
  def get_last_backup_time(repo_id, hf_token):
40
  """
41
  Retrieve the timestamp of the last backup from HuggingFace Space.
42
+ This helps us implement the backup threshold logic to prevent too frequent backups.
43
 
44
  Args:
45
  repo_id (str): The HuggingFace Space ID
 
50
  """
51
  api = HfApi()
52
  try:
53
+ # First check if the timestamp file exists in the repository
54
  files = api.list_repo_files(
55
  repo_id=repo_id,
56
  repo_type="space",
 
62
  print(f"No timestamp file found in repository")
63
  return None
64
 
65
+ # Download and parse the timestamp file if it exists
66
  temp_file = hf_hub_download(
67
  repo_id=repo_id,
68
  repo_type="space",
 
81
  def save_timestamp_locally():
82
  """
83
  Save the current UTC time as our backup timestamp.
84
+ This timestamp is used to track when backups occur and implement threshold logic.
85
+
86
+ Returns:
87
+ bool: True if timestamp was saved successfully, False otherwise
88
  """
89
  try:
90
+ # Use UTC time with timezone information for consistent timestamps
91
  now = datetime.datetime.now(datetime.timezone.utc)
92
  os.makedirs(os.path.dirname(TIMESTAMP_FILE_PATH), exist_ok=True)
93
  with open(TIMESTAMP_FILE_PATH, "w", encoding="utf-8") as f:
 
100
  def verify_database():
101
  """
102
  Verify the database exists and has basic integrity.
103
+ Uses Python's built-in sqlite3 module to perform comprehensive checks:
104
+ 1. Verifies the file exists
105
+ 2. Checks database integrity
106
+ 3. Validates basic schema structure
107
+
108
+ Returns:
109
+ bool: True if database is valid and healthy, False otherwise
110
  """
111
  if not os.path.exists(DB_FILE_PATH):
112
  print(f"Database file not found at: {DB_FILE_PATH}")
113
  return False
114
 
115
  try:
116
+ # Attempt to connect to the database and perform verification
117
+ with sqlite3.connect(DB_FILE_PATH) as conn:
118
+ cursor = conn.cursor()
119
+
120
+ # Run SQLite's built-in integrity check
121
+ cursor.execute("PRAGMA integrity_check;")
122
+ result = cursor.fetchone()[0]
123
+
124
+ # Verify the database has a basic schema structure
125
+ cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
126
+ tables = cursor.fetchall()
127
+
128
+ if result.lower() == "ok" and len(tables) > 0:
129
+ print("Database integrity verified successfully")
130
+ print(f"Found {len(tables)} tables in database")
131
+ return True
132
+ else:
133
+ print("Database integrity check failed")
134
+ if result.lower() != "ok":
135
+ print(f"Integrity check result: {result}")
136
+ if len(tables) == 0:
137
+ print("No tables found in database")
138
+ return False
139
+
140
+ except sqlite3.Error as e:
141
  print(f"Database verification failed: {e}")
142
  return False
143
+ except Exception as e:
144
+ print(f"Unexpected error during database verification: {e}")
145
+ return False
146
 
147
  def backup_db():
148
  """
149
  Main function to handle the database backup process.
150
+ Orchestrates the entire backup operation including:
151
+ 1. Environment validation
152
+ 2. Directory creation
153
+ 3. Threshold checking
154
+ 4. Database verification
155
+ 5. Encryption
156
+ 6. Upload to HuggingFace
157
+
158
+ Returns:
159
+ bool: True if backup completed successfully, False otherwise
160
  """
161
+ # Validate required environment variables
162
  passphrase = os.environ.get("BACKUP_PASSPHRASE")
163
  hf_token = os.environ.get("HF_TOKEN")
164
  space_id = os.environ.get("SPACE_ID")
 
167
  print("Error: Missing required environment variables")
168
  return False
169
 
170
+ # Ensure our directory structure exists
171
  if not ensure_directories():
172
  return False
173
 
174
  # Get threshold from environment variable, defaulting to 2 hours
175
  threshold_minutes = int(os.environ.get("BACKUP_THRESHOLD_MINUTES", 120))
176
 
177
+ # Check if we need to perform a backup based on the threshold
178
  if threshold_minutes > 0:
179
  last_backup_dt = get_last_backup_time(space_id, hf_token)
180
  if last_backup_dt is not None:
 
183
  last_backup_dt = last_backup_dt.replace(tzinfo=datetime.timezone.utc)
184
  elapsed = now - last_backup_dt
185
  if elapsed.total_seconds() < threshold_minutes * 60:
186
+ print(f"Last backup was only {elapsed.total_seconds()/3600:.2f} hours ago")
187
+ print(f"Threshold is {threshold_minutes} minutes")
188
+ print("Skipping backup to avoid rebuild loop")
189
  return True
190
  else:
191
  print("Backup threshold check disabled for testing")
192
 
193
+ # Verify database integrity before proceeding
194
  if not verify_database():
195
  print("Database verification failed, aborting backup")
196
  return False
197
 
198
+ # Proceed with backup encryption
199
  print("Encrypting database with GPG...")
200
  encrypt_cmd = [
201
  "gpg",
 
216
 
217
  print(f"Database encrypted successfully to {DB_GPG_PATH}")
218
 
219
+ # Save the backup timestamp
220
  if not save_timestamp_locally():
221
  return False
222
 
223
+ # Upload to HuggingFace Spaces
224
  print("Uploading to Hugging Face Spaces...")
225
  api = HfApi()
226
 
227
  try:
228
+ # Prepare file operations for both backup and timestamp
229
  operations = [
230
  CommitOperationAdd(
231
  path_in_repo=os.path.relpath(DB_GPG_PATH, BASE_DIR),
 
237
  )
238
  ]
239
 
240
+ # Commit both files to the repository
241
  api.create_commit(
242
  repo_id=space_id,
243
  repo_type="space",
backend/scripts/restore.py CHANGED
@@ -3,14 +3,20 @@ import os
3
  import sys
4
  import subprocess
5
  import datetime
 
6
  from pathlib import Path
7
  from huggingface_hub import HfApi, hf_hub_download
8
 
9
- # Keep paths consistent with backup.py for better maintainability
10
- # Using relative paths allows the script to work regardless of where the repository is cloned
11
- TIMESTAMP_FILE_PATH = "db_backup/last_backup_time.txt"
12
- DB_GPG_PATH = "db_backup/webui.db.gpg"
13
- DB_FILE_PATH = "data/webui.db"
 
 
 
 
 
14
 
15
  def check_requirements():
16
  """
@@ -21,6 +27,7 @@ def check_requirements():
21
  bool: True if GPG is available, False otherwise
22
  """
23
  try:
 
24
  subprocess.run(["gpg", "--version"], check=True, capture_output=True)
25
  return True
26
  except (subprocess.CalledProcessError, FileNotFoundError):
@@ -29,8 +36,8 @@ def check_requirements():
29
 
30
  def validate_secrets():
31
  """
32
- Check that all required environment variables are set.
33
- These variables are essential for accessing the backup and decrypting it.
34
 
35
  Returns:
36
  bool: True if all required variables are set, False otherwise
@@ -43,41 +50,17 @@ def validate_secrets():
43
  return False
44
  return True
45
 
46
- def validate_repo_access(repo_id, token):
47
- """
48
- Verify that we can access the HuggingFace repository with the provided credentials.
49
- This early check helps prevent cryptic errors later in the process.
50
-
51
- Args:
52
- repo_id (str): The HuggingFace Space ID
53
- token (str): The HuggingFace API token
54
-
55
- Returns:
56
- bool: True if repository access is confirmed, False otherwise
57
- """
58
- api = HfApi()
59
- try:
60
- api.list_repo_files(
61
- repo_id=repo_id,
62
- repo_type="space",
63
- token=token
64
- )
65
- return True
66
- except Exception as e:
67
- print(f"Error validating repository access: {e}")
68
- print(f"Please verify your SPACE_ID ({repo_id}) and HF_TOKEN are correct")
69
- return False
70
-
71
  def ensure_directories():
72
  """
73
- Create the necessary directory structure for database and backup files.
74
- This ensures we have proper permissions and all required directories exist.
75
 
76
  Returns:
77
  bool: True if directories are created successfully, False otherwise
78
  """
79
  try:
80
- for directory in ["data", "db_backup"]:
 
81
  os.makedirs(directory, mode=0o755, exist_ok=True)
82
  return True
83
  except Exception as e:
@@ -87,7 +70,6 @@ def ensure_directories():
87
  def get_latest_backup_info(repo_id, hf_token):
88
  """
89
  Check for and retrieve information about the latest backup from HuggingFace.
90
- This function both verifies backup existence and gets its timestamp.
91
 
92
  Args:
93
  repo_id (str): The HuggingFace Space ID
@@ -98,14 +80,16 @@ def get_latest_backup_info(repo_id, hf_token):
98
  """
99
  api = HfApi()
100
  try:
101
- # First check if the backup file exists in the repository
102
  files = api.list_repo_files(
103
  repo_id=repo_id,
104
  repo_type="space",
105
  token=hf_token
106
  )
107
 
108
- backup_exists = DB_GPG_PATH in files
 
 
109
 
110
  if not backup_exists:
111
  print("No backup file found in the repository")
@@ -113,10 +97,11 @@ def get_latest_backup_info(repo_id, hf_token):
113
 
114
  # If backup exists, try to get its timestamp
115
  try:
 
116
  timestamp_file = hf_hub_download(
117
  repo_id=repo_id,
118
  repo_type="space",
119
- filename=TIMESTAMP_FILE_PATH,
120
  token=hf_token
121
  )
122
 
@@ -142,7 +127,6 @@ def get_latest_backup_info(repo_id, hf_token):
142
  def download_backup(repo_id, hf_token):
143
  """
144
  Download the encrypted database backup from HuggingFace.
145
- Handles the safe download and placement of the backup file.
146
 
147
  Args:
148
  repo_id (str): The HuggingFace Space ID
@@ -153,10 +137,11 @@ def download_backup(repo_id, hf_token):
153
  """
154
  try:
155
  print("Downloading encrypted database backup...")
 
156
  temp_file = hf_hub_download(
157
  repo_id=repo_id,
158
  repo_type="space",
159
- filename=DB_GPG_PATH,
160
  token=hf_token
161
  )
162
 
@@ -208,6 +193,7 @@ def decrypt_database(passphrase):
208
  def verify_database():
209
  """
210
  Verify the integrity of the restored database using SQLite's built-in checks.
 
211
 
212
  Returns:
213
  bool: True if database is valid or doesn't exist, False if corrupted
@@ -217,24 +203,45 @@ def verify_database():
217
 
218
  try:
219
  print("Verifying database integrity...")
220
- verify_cmd = ["sqlite3", DB_FILE_PATH, "PRAGMA integrity_check;"]
221
- result = subprocess.run(verify_cmd, capture_output=True, text=True, check=True)
222
-
223
- if "ok" in result.stdout.lower():
224
- print("Database integrity verified")
225
- return True
226
- else:
227
- print("Database integrity check failed")
228
- return False
229
 
230
- except subprocess.CalledProcessError as e:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
231
  print(f"Database verification failed: {e}")
232
  return False
 
 
 
233
 
234
  def restore_db():
235
  """
236
- Main function that orchestrates the database restoration process.
237
- Performs all necessary checks and handles the complete restore operation.
 
 
 
 
 
238
 
239
  Returns:
240
  bool: True if restore is successful or no backup needed, False on error
@@ -252,10 +259,6 @@ def restore_db():
252
  hf_token = os.environ["HF_TOKEN"]
253
  space_id = os.environ["SPACE_ID"]
254
 
255
- # Validate repository access first
256
- if not validate_repo_access(space_id, hf_token):
257
- return False
258
-
259
  # Check if there's a backup in the repository
260
  backup_exists, timestamp = get_latest_backup_info(space_id, hf_token)
261
 
 
3
  import sys
4
  import subprocess
5
  import datetime
6
+ import sqlite3
7
  from pathlib import Path
8
  from huggingface_hub import HfApi, hf_hub_download
9
 
10
+ # Just like in backup.py, we establish absolute paths based on the script's location
11
+ # This ensures consistent path handling regardless of where the script is called from
12
+ SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
13
+ BASE_DIR = os.path.dirname(SCRIPT_DIR)
14
+
15
+ # Define all paths as absolute paths relative to our base directory
16
+ # This maintains consistency with backup.py and prevents path-related issues
17
+ TIMESTAMP_FILE_PATH = os.path.join(BASE_DIR, "db_backup/last_backup_time.txt")
18
+ DB_GPG_PATH = os.path.join(BASE_DIR, "db_backup/webui.db.gpg")
19
+ DB_FILE_PATH = os.path.join(BASE_DIR, "data/webui.db")
20
 
21
  def check_requirements():
22
  """
 
27
  bool: True if GPG is available, False otherwise
28
  """
29
  try:
30
+ # Test GPG availability by checking its version
31
  subprocess.run(["gpg", "--version"], check=True, capture_output=True)
32
  return True
33
  except (subprocess.CalledProcessError, FileNotFoundError):
 
36
 
37
  def validate_secrets():
38
  """
39
+ Ensure all required environment variables are set.
40
+ These variables are essential for accessing and decrypting the backup.
41
 
42
  Returns:
43
  bool: True if all required variables are set, False otherwise
 
50
  return False
51
  return True
52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  def ensure_directories():
54
  """
55
+ Create necessary directories for database and backup files.
56
+ This ensures we have the proper directory structure before any operations.
57
 
58
  Returns:
59
  bool: True if directories are created successfully, False otherwise
60
  """
61
  try:
62
+ # Create both backup and database directories with appropriate permissions
63
+ for directory in [os.path.dirname(DB_FILE_PATH), os.path.dirname(DB_GPG_PATH)]:
64
  os.makedirs(directory, mode=0o755, exist_ok=True)
65
  return True
66
  except Exception as e:
 
70
  def get_latest_backup_info(repo_id, hf_token):
71
  """
72
  Check for and retrieve information about the latest backup from HuggingFace.
 
73
 
74
  Args:
75
  repo_id (str): The HuggingFace Space ID
 
80
  """
81
  api = HfApi()
82
  try:
83
+ # Check if backup file exists in the repository
84
  files = api.list_repo_files(
85
  repo_id=repo_id,
86
  repo_type="space",
87
  token=hf_token
88
  )
89
 
90
+ # Look for backup file using relative path
91
+ relative_backup_path = os.path.relpath(DB_GPG_PATH, BASE_DIR)
92
+ backup_exists = relative_backup_path in files
93
 
94
  if not backup_exists:
95
  print("No backup file found in the repository")
 
97
 
98
  # If backup exists, try to get its timestamp
99
  try:
100
+ relative_timestamp_path = os.path.relpath(TIMESTAMP_FILE_PATH, BASE_DIR)
101
  timestamp_file = hf_hub_download(
102
  repo_id=repo_id,
103
  repo_type="space",
104
+ filename=relative_timestamp_path,
105
  token=hf_token
106
  )
107
 
 
127
  def download_backup(repo_id, hf_token):
128
  """
129
  Download the encrypted database backup from HuggingFace.
 
130
 
131
  Args:
132
  repo_id (str): The HuggingFace Space ID
 
137
  """
138
  try:
139
  print("Downloading encrypted database backup...")
140
+ relative_backup_path = os.path.relpath(DB_GPG_PATH, BASE_DIR)
141
  temp_file = hf_hub_download(
142
  repo_id=repo_id,
143
  repo_type="space",
144
+ filename=relative_backup_path,
145
  token=hf_token
146
  )
147
 
 
193
  def verify_database():
194
  """
195
  Verify the integrity of the restored database using SQLite's built-in checks.
196
+ This function uses Python's sqlite3 module instead of the command-line tool.
197
 
198
  Returns:
199
  bool: True if database is valid or doesn't exist, False if corrupted
 
203
 
204
  try:
205
  print("Verifying database integrity...")
206
+ with sqlite3.connect(DB_FILE_PATH) as conn:
207
+ cursor = conn.cursor()
 
 
 
 
 
 
 
208
 
209
+ # Run integrity check
210
+ cursor.execute("PRAGMA integrity_check;")
211
+ result = cursor.fetchone()[0]
212
+
213
+ # Verify basic schema structure
214
+ cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
215
+ tables = cursor.fetchall()
216
+
217
+ if result.lower() == "ok" and len(tables) > 0:
218
+ print("Database integrity verified successfully")
219
+ print(f"Found {len(tables)} tables in database")
220
+ return True
221
+ else:
222
+ print("Database integrity check failed")
223
+ if result.lower() != "ok":
224
+ print(f"Integrity check result: {result}")
225
+ if len(tables) == 0:
226
+ print("No tables found in database")
227
+ return False
228
+
229
+ except sqlite3.Error as e:
230
  print(f"Database verification failed: {e}")
231
  return False
232
+ except Exception as e:
233
+ print(f"Unexpected error during database verification: {e}")
234
+ return False
235
 
236
  def restore_db():
237
  """
238
+ Main function to handle the database restoration process.
239
+ This function orchestrates the complete restore operation, including:
240
+ 1. Environment validation
241
+ 2. Directory creation
242
+ 3. Backup download
243
+ 4. Decryption
244
+ 5. Database verification
245
 
246
  Returns:
247
  bool: True if restore is successful or no backup needed, False on error
 
259
  hf_token = os.environ["HF_TOKEN"]
260
  space_id = os.environ["SPACE_ID"]
261
 
 
 
 
 
262
  # Check if there's a backup in the repository
263
  backup_exists, timestamp = get_latest_backup_info(space_id, hf_token)
264