thryyyyy commited on
Commit
47ac644
·
1 Parent(s): 9e206b4

more fixes

Browse files
Files changed (1) hide show
  1. backend/scripts/backup.py +146 -52
backend/scripts/backup.py CHANGED
@@ -1,110 +1,204 @@
1
  #!/usr/bin/env python3
2
  import os
 
3
  import subprocess
4
  import datetime
 
5
  from huggingface_hub import HfApi, hf_hub_download, CommitOperationAdd
6
 
7
- TIMESTAMP_FILE_PATH = "db_backup/last_backup_time.txt"
8
- DB_GPG_PATH = "db_backup/webui.db.gpg"
9
- DB_FILE_PATH = "data/webui.db"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
  def get_last_backup_time(repo_id, hf_token):
12
  """
13
- Attempt to download and parse the last_backup_time.txt file
14
- from the HF Space. If it doesn't exist, return None.
 
 
 
 
 
 
 
15
  """
16
  api = HfApi()
17
  try:
18
- temp_file = hf_hub_download(
 
19
  repo_id=repo_id,
20
  repo_type="space",
21
- filename=TIMESTAMP_FILE_PATH,
22
  token=hf_token
23
  )
24
- except Exception as e:
25
- print(f"Could not download {TIMESTAMP_FILE_PATH}: {e}")
26
- return None
 
 
27
 
28
- try:
 
 
 
 
 
 
 
29
  with open(temp_file, "r", encoding="utf-8") as f:
30
  timestamp_str = f.read().strip()
31
- last_backup_dt = datetime.datetime.fromisoformat(timestamp_str)
32
- return last_backup_dt
33
  except Exception as e:
34
- print(f"Error parsing timestamp from {TIMESTAMP_FILE_PATH}: {e}")
35
  return None
36
 
37
  def save_timestamp_locally():
38
- """Save the current UTC time to db_backup/last_backup_time.txt (locally)."""
39
- now = datetime.datetime.utcnow()
40
- os.makedirs("db_backup", exist_ok=True)
41
- with open(TIMESTAMP_FILE_PATH, "w", encoding="utf-8") as f:
42
- f.write(now.isoformat())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
  def backup_db():
45
- # Get environment variables
 
 
 
 
46
  passphrase = os.environ.get("BACKUP_PASSPHRASE")
47
  hf_token = os.environ.get("HF_TOKEN")
48
  space_id = os.environ.get("SPACE_ID")
49
 
50
- # Get threshold from environment variable, defaulting to 2 hours if not set
51
- # For testing, set BACKUP_THRESHOLD_MINUTES=0 to disable the threshold check
 
 
 
 
 
 
 
52
  threshold_minutes = int(os.environ.get("BACKUP_THRESHOLD_MINUTES", 120))
53
 
54
- if not passphrase:
55
- raise ValueError("BACKUP_PASSPHRASE is not set.")
56
- if not hf_token:
57
- raise ValueError("HF_TOKEN is not set.")
58
- if not space_id:
59
- raise ValueError("SPACE_ID is not set (or define repo_id manually).")
60
-
61
  # Check if backup is needed based on threshold
62
- if threshold_minutes > 0: # Only check if threshold is positive
63
  last_backup_dt = get_last_backup_time(space_id, hf_token)
64
  if last_backup_dt is not None:
65
- now = datetime.datetime.utcnow()
 
 
66
  elapsed = now - last_backup_dt
67
  if elapsed.total_seconds() < threshold_minutes * 60:
68
  print(f"Last backup was only {elapsed.total_seconds()/3600:.2f} hours ago.")
69
  print(f"Threshold is {threshold_minutes} minutes.")
70
  print("Skipping backup to avoid rebuild loop.")
71
- return
72
  else:
73
  print("Backup threshold check disabled for testing")
74
 
 
 
 
 
 
75
  # Proceed with backup...
76
  print("Encrypting database with GPG...")
77
  encrypt_cmd = [
78
- "gpg", "--batch", "--yes", "--passphrase", passphrase,
79
- "-c", "--cipher-algo", "AES256",
 
 
 
 
80
  "-o", DB_GPG_PATH,
81
  DB_FILE_PATH
82
  ]
83
- subprocess.run(encrypt_cmd, check=True)
 
 
 
 
 
 
84
  print(f"Database encrypted successfully to {DB_GPG_PATH}")
85
 
86
  # Update the timestamp file locally
87
- save_timestamp_locally()
 
88
 
89
  # Upload to Hugging Face Spaces
90
  print("Uploading to Hugging Face Spaces...")
91
  api = HfApi()
92
- repo_id = space_id
93
-
94
- # Create operations list for both files
95
- operations = [
96
- CommitOperationAdd(path_in_repo=DB_GPG_PATH, path_or_fileobj=DB_GPG_PATH),
97
- CommitOperationAdd(path_in_repo=TIMESTAMP_FILE_PATH, path_or_fileobj=TIMESTAMP_FILE_PATH)
98
- ]
 
 
 
 
 
 
99
 
100
- api.create_commit(
101
- repo_id=repo_id,
102
- repo_type="space",
103
- operations=operations,
104
- commit_message="Update encrypted database backup + timestamp",
105
- token=hf_token
106
- )
107
- print("DB backup + timestamp uploaded successfully!")
 
 
 
 
 
108
 
109
  if __name__ == "__main__":
110
- backup_db()
 
 
1
  #!/usr/bin/env python3
2
  import os
3
+ import sys
4
  import subprocess
5
  import datetime
6
+ from pathlib import Path
7
  from huggingface_hub import HfApi, hf_hub_download, CommitOperationAdd
8
 
9
+ SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
10
+ BASE_DIR = os.path.dirname(SCRIPT_DIR)
11
+
12
+ TIMESTAMP_FILE_PATH = os.path.join(BASE_DIR, "db_backup/last_backup_time.txt")
13
+ DB_GPG_PATH = os.path.join(BASE_DIR, "db_backup/webui.db.gpg")
14
+ DB_FILE_PATH = os.path.join(BASE_DIR, "data/webui.db")
15
+
16
+ def ensure_directories():
17
+ """
18
+ Create all necessary directories for our backup operation.
19
+ Using absolute paths and proper permissions ensures consistent behavior.
20
+ """
21
+ try:
22
+ # Create directories for both backup and database files
23
+ for directory in [os.path.dirname(DB_GPG_PATH), os.path.dirname(DB_FILE_PATH)]:
24
+ os.makedirs(directory, mode=0o755, exist_ok=True)
25
+ return True
26
+ except Exception as e:
27
+ print(f"Error creating directories: {e}")
28
+ return False
29
 
30
  def get_last_backup_time(repo_id, hf_token):
31
  """
32
+ Retrieve the timestamp of the last backup from HuggingFace Space.
33
+ This helps us determine if we need to perform a new backup.
34
+
35
+ Args:
36
+ repo_id (str): The HuggingFace Space ID
37
+ hf_token (str): The HuggingFace API token
38
+
39
+ Returns:
40
+ datetime or None: The timestamp of the last backup, or None if not found
41
  """
42
  api = HfApi()
43
  try:
44
+ # First check if the file exists in the repository
45
+ files = api.list_repo_files(
46
  repo_id=repo_id,
47
  repo_type="space",
 
48
  token=hf_token
49
  )
50
+
51
+ relative_timestamp_path = os.path.relpath(TIMESTAMP_FILE_PATH, BASE_DIR)
52
+ if relative_timestamp_path not in files:
53
+ print(f"No timestamp file found in repository")
54
+ return None
55
 
56
+ # Download and parse the timestamp file
57
+ temp_file = hf_hub_download(
58
+ repo_id=repo_id,
59
+ repo_type="space",
60
+ filename=relative_timestamp_path,
61
+ token=hf_token
62
+ )
63
+
64
  with open(temp_file, "r", encoding="utf-8") as f:
65
  timestamp_str = f.read().strip()
66
+ return datetime.datetime.fromisoformat(timestamp_str)
67
+
68
  except Exception as e:
69
+ print(f"Error getting last backup time: {e}")
70
  return None
71
 
72
  def save_timestamp_locally():
73
+ """
74
+ Save the current UTC time as our backup timestamp.
75
+ Creates the backup directory if it doesn't exist.
76
+ """
77
+ try:
78
+ now = datetime.datetime.now(datetime.timezone.utc)
79
+ os.makedirs(os.path.dirname(TIMESTAMP_FILE_PATH), exist_ok=True)
80
+ with open(TIMESTAMP_FILE_PATH, "w", encoding="utf-8") as f:
81
+ f.write(now.isoformat())
82
+ return True
83
+ except Exception as e:
84
+ print(f"Error saving timestamp: {e}")
85
+ return False
86
+
87
+ def verify_database():
88
+ """
89
+ Verify the database exists and has basic integrity.
90
+ This prevents us from backing up corrupted data.
91
+ """
92
+ if not os.path.exists(DB_FILE_PATH):
93
+ print(f"Database file not found at: {DB_FILE_PATH}")
94
+ return False
95
+
96
+ try:
97
+ verify_cmd = ["sqlite3", DB_FILE_PATH, "PRAGMA integrity_check;"]
98
+ result = subprocess.run(verify_cmd, capture_output=True, text=True, check=True)
99
+ return "ok" in result.stdout.lower()
100
+ except subprocess.CalledProcessError as e:
101
+ print(f"Database verification failed: {e}")
102
+ return False
103
 
104
  def backup_db():
105
+ """
106
+ Main function to handle the database backup process.
107
+ Includes verification, encryption, and upload to HuggingFace.
108
+ """
109
+ # Ensure we have all required environment variables
110
  passphrase = os.environ.get("BACKUP_PASSPHRASE")
111
  hf_token = os.environ.get("HF_TOKEN")
112
  space_id = os.environ.get("SPACE_ID")
113
 
114
+ if not all([passphrase, hf_token, space_id]):
115
+ print("Error: Missing required environment variables")
116
+ return False
117
+
118
+ # Create necessary directories first
119
+ if not ensure_directories():
120
+ return False
121
+
122
+ # Get threshold from environment variable, defaulting to 2 hours
123
  threshold_minutes = int(os.environ.get("BACKUP_THRESHOLD_MINUTES", 120))
124
 
 
 
 
 
 
 
 
125
  # Check if backup is needed based on threshold
126
+ if threshold_minutes > 0:
127
  last_backup_dt = get_last_backup_time(space_id, hf_token)
128
  if last_backup_dt is not None:
129
+ now = datetime.datetime.now(datetime.timezone.utc)
130
+ if not last_backup_dt.tzinfo:
131
+ last_backup_dt = last_backup_dt.replace(tzinfo=datetime.timezone.utc)
132
  elapsed = now - last_backup_dt
133
  if elapsed.total_seconds() < threshold_minutes * 60:
134
  print(f"Last backup was only {elapsed.total_seconds()/3600:.2f} hours ago.")
135
  print(f"Threshold is {threshold_minutes} minutes.")
136
  print("Skipping backup to avoid rebuild loop.")
137
+ return True
138
  else:
139
  print("Backup threshold check disabled for testing")
140
 
141
+ # Verify database before backing up
142
+ if not verify_database():
143
+ print("Database verification failed, aborting backup")
144
+ return False
145
+
146
  # Proceed with backup...
147
  print("Encrypting database with GPG...")
148
  encrypt_cmd = [
149
+ "gpg",
150
+ "--batch",
151
+ "--yes",
152
+ "--passphrase", passphrase,
153
+ "-c",
154
+ "--cipher-algo", "AES256",
155
  "-o", DB_GPG_PATH,
156
  DB_FILE_PATH
157
  ]
158
+
159
+ try:
160
+ subprocess.run(encrypt_cmd, check=True)
161
+ except subprocess.CalledProcessError as e:
162
+ print(f"Encryption failed: {e}")
163
+ return False
164
+
165
  print(f"Database encrypted successfully to {DB_GPG_PATH}")
166
 
167
  # Update the timestamp file locally
168
+ if not save_timestamp_locally():
169
+ return False
170
 
171
  # Upload to Hugging Face Spaces
172
  print("Uploading to Hugging Face Spaces...")
173
  api = HfApi()
174
+
175
+ try:
176
+ # Create operations list for both files
177
+ operations = [
178
+ CommitOperationAdd(
179
+ path_in_repo=os.path.relpath(DB_GPG_PATH, BASE_DIR),
180
+ path_or_fileobj=DB_GPG_PATH
181
+ ),
182
+ CommitOperationAdd(
183
+ path_in_repo=os.path.relpath(TIMESTAMP_FILE_PATH, BASE_DIR),
184
+ path_or_fileobj=TIMESTAMP_FILE_PATH
185
+ )
186
+ ]
187
 
188
+ api.create_commit(
189
+ repo_id=space_id,
190
+ repo_type="space",
191
+ operations=operations,
192
+ commit_message="Update encrypted database backup + timestamp",
193
+ token=hf_token
194
+ )
195
+ print("DB backup + timestamp uploaded successfully!")
196
+ return True
197
+
198
+ except Exception as e:
199
+ print(f"Error uploading to HuggingFace: {e}")
200
+ return False
201
 
202
  if __name__ == "__main__":
203
+ success = backup_db()
204
+ sys.exit(0 if success else 1)