triflix commited on
Commit
43f0301
Β·
verified Β·
1 Parent(s): cfcea40

Upload 21 files

Browse files
app/admin/router.py CHANGED
@@ -27,7 +27,7 @@ import secrets
27
  from app.database import get_db, DB_PATH, integrity_check
28
  from app.backup import (
29
  create_snapshot, create_compressed_snapshot, create_vacuum_snapshot,
30
- upload_to_hf_bucket, list_backups, restore_from_hf_bucket
31
  )
32
 
33
  router = APIRouter(prefix="/admin", tags=["admin"])
@@ -165,11 +165,13 @@ async def run_query(request: Request, sql: str = Form(...), _=Depends(verify_adm
165
  async def backups_page(request: Request, _=Depends(verify_admin)):
166
  backup_info = list_backups()
167
  health = integrity_check()
 
168
 
169
  return templates.TemplateResponse("backups.html", {
170
  "request": request,
171
  "backups": backup_info,
172
  "health": health,
 
173
  })
174
 
175
  @router.post("/backups/create")
@@ -185,6 +187,8 @@ async def create_backup(request: Request, backup_type: str = Form("snapshot"), _
185
  else:
186
  result = {"status": "error", "message": f"Unknown type: {backup_type}"}
187
 
 
 
188
  return RedirectResponse(url="/admin/backups", status_code=303)
189
 
190
  @router.get("/backups/download/{filename}")
@@ -213,3 +217,20 @@ async def export_table(table_name: str, format: str = "json", _=Depends(verify_a
213
  conn = get_db()
214
  rows = conn.execute(f"SELECT * FROM [{table_name}]").fetchall()
215
  return [dict(r) for r in rows]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  from app.database import get_db, DB_PATH, integrity_check
28
  from app.backup import (
29
  create_snapshot, create_compressed_snapshot, create_vacuum_snapshot,
30
+ upload_to_hf_bucket, list_backups, restore_from_hf_bucket, diagnose_hf_setup
31
  )
32
 
33
  router = APIRouter(prefix="/admin", tags=["admin"])
 
165
  async def backups_page(request: Request, _=Depends(verify_admin)):
166
  backup_info = list_backups()
167
  health = integrity_check()
168
+ hf_diagnosis = diagnose_hf_setup()
169
 
170
  return templates.TemplateResponse("backups.html", {
171
  "request": request,
172
  "backups": backup_info,
173
  "health": health,
174
+ "hf_diagnosis": hf_diagnosis,
175
  })
176
 
177
  @router.post("/backups/create")
 
187
  else:
188
  result = {"status": "error", "message": f"Unknown type: {backup_type}"}
189
 
190
+ # Store result in session/cookie for display
191
+ # For simplicity, we just redirect - the result is logged
192
  return RedirectResponse(url="/admin/backups", status_code=303)
193
 
194
  @router.get("/backups/download/{filename}")
 
217
  conn = get_db()
218
  rows = conn.execute(f"SELECT * FROM [{table_name}]").fetchall()
219
  return [dict(r) for r in rows]
220
+
221
+ # ── DIAGNOSTICS ──
222
+ @router.get("/api/diagnostics")
223
+ async def api_diagnostics(_=Depends(verify_admin)):
224
+ """Run HF diagnostics - check credentials and repo access."""
225
+ return diagnose_hf_setup()
226
+
227
+ @router.post("/api/backup/upload")
228
+ async def api_backup_upload(_=Depends(verify_admin)):
229
+ """Trigger HF bucket upload via API."""
230
+ return upload_to_hf_bucket()
231
+
232
+ @router.post("/api/backup/test")
233
+ async def api_backup_test(_=Depends(verify_admin)):
234
+ """Test backup creation and return result."""
235
+ result = create_compressed_snapshot()
236
+ return result
app/admin/templates/backups.html CHANGED
@@ -7,6 +7,23 @@
7
  <a href="/admin/dashboard" class="btn-secondary">← Back to Dashboard</a>
8
  </div>
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  <!-- Health Status -->
11
  <div class="card">
12
  <h2>Database Status</h2>
@@ -53,7 +70,11 @@
53
  <!-- Local Snapshots -->
54
  <div class="card">
55
  <h2>πŸ“ Local Snapshots</h2>
56
- <p class="info-text">Directory: <code>{{ backups.snapshot_dir }}</code></p>
 
 
 
 
57
 
58
  {% if backups.local_snapshots %}
59
  <table class="data-table">
@@ -83,6 +104,17 @@
83
  {% endif %}
84
  </div>
85
 
 
 
 
 
 
 
 
 
 
 
 
86
  <!-- Backup Strategy Info -->
87
  <div class="card">
88
  <h2>πŸ“š 3-Layer Backup Strategy</h2>
 
7
  <a href="/admin/dashboard" class="btn-secondary">← Back to Dashboard</a>
8
  </div>
9
 
10
+ <!-- HF Diagnostics -->
11
+ <div class="card {% if '❌' in hf_diagnosis.overall_status %}error-card{% endif %}">
12
+ <h2>πŸ”§ HF Setup Diagnostics</h2>
13
+ <div class="diagnostics">
14
+ {% for check in hf_diagnosis.checks %}
15
+ <div class="diag-item">
16
+ <span class="diag-name">{{ check.name }}</span>
17
+ <span class="diag-status">{{ check.status }}</span>
18
+ <span class="diag-value">{{ check.value }}</span>
19
+ </div>
20
+ {% endfor %}
21
+ <div class="diag-overall">
22
+ <strong>{{ hf_diagnosis.overall_status }}</strong>
23
+ </div>
24
+ </div>
25
+ </div>
26
+
27
  <!-- Health Status -->
28
  <div class="card">
29
  <h2>Database Status</h2>
 
70
  <!-- Local Snapshots -->
71
  <div class="card">
72
  <h2>πŸ“ Local Snapshots</h2>
73
+ <p class="info-text">
74
+ Directory: <code>{{ backups.snapshot_dir }}</code><br>
75
+ HF Bucket Repo: <code>{{ backups.hf_bucket_repo }}</code><br>
76
+ HF Token: <code>{% if backups.hf_token_set %}βœ… Set{% else %}❌ Not Set{% endif %}</code>
77
+ </p>
78
 
79
  {% if backups.local_snapshots %}
80
  <table class="data-table">
 
104
  {% endif %}
105
  </div>
106
 
107
+ <!-- Quick API Test -->
108
+ <div class="card">
109
+ <h2>πŸ§ͺ Quick Test</h2>
110
+ <p class="info-text">Use these API endpoints to test backup functionality:</p>
111
+ <div class="quick-queries">
112
+ <code>GET /admin/api/diagnostics</code> - Check HF setup<br>
113
+ <code>POST /admin/api/backup/test</code> - Test backup creation<br>
114
+ <code>POST /admin/api/backup/upload</code> - Upload to HF Bucket
115
+ </div>
116
+ </div>
117
+
118
  <!-- Backup Strategy Info -->
119
  <div class="card">
120
  <h2>πŸ“š 3-Layer Backup Strategy</h2>
app/backup.py CHANGED
@@ -1,6 +1,6 @@
1
  """
2
- Backup Module β€” 3-Layer Backup Strategy
3
- ────────────────────────────────────────
4
  Layer 1: Litestream β†’ local file replica (every 1s, automatic)
5
  Layer 2: Python sqlite3.backup() β†’ snapshot to /tmp (on-demand + scheduled)
6
  Layer 3: Upload snapshot to HF Bucket (persistent, survives restarts)
@@ -20,7 +20,7 @@ import shutil
20
  import logging
21
  from datetime import datetime, timedelta
22
  from pathlib import Path
23
- from huggingface_hub import HfApi, hf_hub_download
24
  from contextlib import contextmanager
25
 
26
  logger = logging.getLogger(__name__)
@@ -28,12 +28,16 @@ logger = logging.getLogger(__name__)
28
  DB_PATH = "/tmp/data/app.db"
29
  BACKUP_DIR = "/tmp/data/backups"
30
  SNAPSHOT_DIR = "/tmp/data/snapshots"
 
31
 
32
  HF_TOKEN = os.environ.get("HF_TOKEN", "")
33
- HF_BUCKET_REPO = os.environ.get("HF_BUCKET_REPO", "") # "username/bucket-name"
34
 
 
35
  os.makedirs(BACKUP_DIR, exist_ok=True)
36
  os.makedirs(SNAPSHOT_DIR, exist_ok=True)
 
 
37
 
38
 
39
  # ═══════════════════════════════════════════════════════
@@ -50,7 +54,10 @@ def create_snapshot(label: str = "manual") -> dict:
50
  Source: https://docs.python.org/3/library/sqlite3.html#sqlite3.Connection.backup
51
  """
52
  if not os.path.exists(DB_PATH):
53
- return {"status": "error", "message": "Database not found"}
 
 
 
54
 
55
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
56
  snapshot_name = f"app_{label}_{timestamp}.db"
@@ -94,29 +101,53 @@ def create_compressed_snapshot() -> dict:
94
 
95
  Source: https://litestream.io/alternatives/cron/
96
  """
97
- result = create_snapshot(label="compressed")
 
 
 
 
 
98
  if result["status"] != "success":
99
  return result
100
 
101
- snapshot_path = result["path"]
102
- gz_path = snapshot_path + ".gz"
103
-
104
- with open(snapshot_path, 'rb') as f_in:
105
- with gzip.open(gz_path, 'wb') as f_out:
106
- shutil.copyfileobj(f_in, f_out)
107
-
108
- # Remove uncompressed version
109
- os.remove(snapshot_path)
110
-
111
- gz_size = os.path.getsize(gz_path)
112
- result["compressed_path"] = gz_path
113
- result["compressed_size_bytes"] = gz_size
114
- result["compressed_size_mb"] = round(gz_size / (1024 * 1024), 2)
115
- result["compression_ratio"] = round(
116
- (1 - gz_size / result["size_bytes"]) * 100, 1
117
- )
118
 
119
- return result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
 
121
 
122
  def create_vacuum_snapshot() -> dict:
@@ -158,6 +189,47 @@ def create_vacuum_snapshot() -> dict:
158
  # LAYER 3: HF Bucket Sync β€” Persistent Remote Storage
159
  # ═══════════════════════════════════════════════════════
160
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
  def upload_to_hf_bucket(local_path: str = None) -> dict:
162
  """
163
  Upload a database snapshot to HuggingFace Bucket.
@@ -166,31 +238,46 @@ def upload_to_hf_bucket(local_path: str = None) -> dict:
166
 
167
  Source: https://huggingface.co/docs/huggingface_hub/guides/upload
168
  """
 
169
  if not HF_TOKEN or not HF_BUCKET_REPO:
170
- return {"status": "error", "message": "HF_TOKEN or HF_BUCKET_REPO not set"}
 
 
171
 
172
- api = HfApi(token=HF_TOKEN)
 
 
 
173
 
174
  # Create snapshot if not provided
175
  if local_path is None:
176
  result = create_compressed_snapshot()
177
  if result["status"] != "success":
178
  return result
179
- local_path = result.get("compressed_path", result["path"])
 
 
 
 
180
 
181
  filename = os.path.basename(local_path)
182
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
183
 
184
  try:
 
 
 
 
185
  # Upload latest (overwrites)
186
  api.upload_file(
187
  path_or_fileobj=local_path,
188
- path_in_repo=f"db-backups/latest.db.gz",
189
  repo_id=HF_BUCKET_REPO,
190
- repo_type="dataset", # Use dataset repo as bucket
191
  )
192
 
193
- # Also keep a timestamped copy (rolling 7-day)
 
194
  api.upload_file(
195
  path_or_fileobj=local_path,
196
  path_in_repo=f"db-backups/archive/{filename}",
@@ -198,16 +285,16 @@ def upload_to_hf_bucket(local_path: str = None) -> dict:
198
  repo_type="dataset",
199
  )
200
 
201
- file_size = os.path.getsize(local_path)
202
  _log_backup("hf_bucket", f"db-backups/{filename}", file_size, "success")
203
 
204
  logger.info(f"βœ… Uploaded to HF Bucket: {filename}")
205
- return {"status": "success", "remote_path": f"db-backups/{filename}"}
206
 
207
  except Exception as e:
208
- _log_backup("hf_bucket", local_path, 0, "error", str(e))
209
- logger.error(f"❌ HF Bucket upload failed: {e}")
210
- return {"status": "error", "message": str(e)}
 
211
 
212
 
213
  def restore_from_hf_bucket() -> dict:
@@ -219,26 +306,31 @@ def restore_from_hf_bucket() -> dict:
219
  """
220
  if not HF_TOKEN or not HF_BUCKET_REPO:
221
  logger.warning("⚠️ HF credentials not set, skipping restore")
222
- return {"status": "skipped", "message": "No HF credentials"}
223
 
224
- if os.path.exists(DB_PATH):
225
  logger.info("βœ… Database already exists, skipping restore")
226
  return {"status": "skipped", "message": "DB already exists"}
227
 
 
 
 
 
228
  try:
229
- api = HfApi(token=HF_TOKEN)
230
 
231
  # Download latest backup
232
  local_gz = hf_hub_download(
233
  repo_id=HF_BUCKET_REPO,
234
  filename="db-backups/latest.db.gz",
235
  repo_type="dataset",
236
- local_dir="/tmp/data/restore",
237
  token=HF_TOKEN,
238
  )
239
 
 
 
240
  # Decompress
241
- os.makedirs(os.path.dirname(DB_PATH), exist_ok=True)
242
  with gzip.open(local_gz, 'rb') as f_in:
243
  with open(DB_PATH, 'wb') as f_out:
244
  shutil.copyfileobj(f_in, f_out)
@@ -257,8 +349,14 @@ def restore_from_hf_bucket() -> dict:
257
  return {"status": "error", "message": f"Integrity check failed: {result[0]}"}
258
 
259
  except Exception as e:
260
- logger.info(f"πŸ“¦ No backup found in HF Bucket: {e}")
261
- return {"status": "not_found", "message": str(e)}
 
 
 
 
 
 
262
 
263
 
264
  # ═══════════════════════════════════════════════════════
@@ -271,22 +369,41 @@ async def scheduled_backup_task():
271
  Creates snapshot + uploads to HF Bucket.
272
  """
273
  import asyncio
 
 
 
 
274
  while True:
275
- await asyncio.sleep(1800) # 30 minutes
276
  try:
277
- upload_to_hf_bucket()
 
 
 
 
 
 
278
  cleanup_old_snapshots(max_age_hours=48)
279
  except Exception as e:
280
- logger.error(f"Scheduled backup error: {e}")
 
 
281
 
282
 
283
  def cleanup_old_snapshots(max_age_hours: int = 48):
284
  """Remove local snapshots older than max_age_hours."""
285
  cutoff = datetime.now() - timedelta(hours=max_age_hours)
 
286
  for f in Path(SNAPSHOT_DIR).glob("*.db*"):
287
- if datetime.fromtimestamp(f.stat().st_mtime) < cutoff:
288
- f.unlink()
289
- logger.info(f"πŸ—‘οΈ Removed old snapshot: {f.name}")
 
 
 
 
 
 
 
290
 
291
 
292
  def list_backups() -> dict:
@@ -294,14 +411,22 @@ def list_backups() -> dict:
294
  local_files = []
295
 
296
  for f in sorted(Path(SNAPSHOT_DIR).glob("*.db*"), key=lambda x: x.stat().st_mtime, reverse=True):
297
- local_files.append({
298
- "name": f.name,
299
- "size_bytes": f.stat().st_size,
300
- "size_mb": round(f.stat().st_size / (1024 * 1024), 2),
301
- "modified": datetime.fromtimestamp(f.stat().st_mtime).isoformat(),
302
- })
 
 
 
303
 
304
- return {"local_snapshots": local_files, "snapshot_dir": SNAPSHOT_DIR}
 
 
 
 
 
305
 
306
 
307
  # ── Internal helper ──
@@ -315,5 +440,90 @@ def _log_backup(backup_type, file_path, file_size, status, error=None):
315
  )
316
  conn.commit()
317
  conn.close()
318
- except:
319
- pass # Don't fail if logging fails
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  """
2
+ Backup Module β€” 3-Layer Backup Strategy (FIXED)
3
+ ────────────────────────────────────────────────
4
  Layer 1: Litestream β†’ local file replica (every 1s, automatic)
5
  Layer 2: Python sqlite3.backup() β†’ snapshot to /tmp (on-demand + scheduled)
6
  Layer 3: Upload snapshot to HF Bucket (persistent, survives restarts)
 
20
  import logging
21
  from datetime import datetime, timedelta
22
  from pathlib import Path
23
+ from huggingface_hub import HfApi, hf_hub_download, create_repo, repo_exists
24
  from contextlib import contextmanager
25
 
26
  logger = logging.getLogger(__name__)
 
28
  DB_PATH = "/tmp/data/app.db"
29
  BACKUP_DIR = "/tmp/data/backups"
30
  SNAPSHOT_DIR = "/tmp/data/snapshots"
31
+ RESTORE_DIR = "/tmp/data/restore"
32
 
33
  HF_TOKEN = os.environ.get("HF_TOKEN", "")
34
+ HF_BUCKET_REPO = os.environ.get("HF_BUCKET_REPO", "") # "username/repo-name"
35
 
36
+ # Ensure all directories exist
37
  os.makedirs(BACKUP_DIR, exist_ok=True)
38
  os.makedirs(SNAPSHOT_DIR, exist_ok=True)
39
+ os.makedirs(RESTORE_DIR, exist_ok=True)
40
+ os.makedirs(os.path.dirname(DB_PATH), exist_ok=True)
41
 
42
 
43
  # ═══════════════════════════════════════════════════════
 
54
  Source: https://docs.python.org/3/library/sqlite3.html#sqlite3.Connection.backup
55
  """
56
  if not os.path.exists(DB_PATH):
57
+ logger.warning(f"Database not found at {DB_PATH}, creating empty database first")
58
+ # Create empty database with schema
59
+ from app.database import init_db
60
+ init_db()
61
 
62
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
63
  snapshot_name = f"app_{label}_{timestamp}.db"
 
101
 
102
  Source: https://litestream.io/alternatives/cron/
103
  """
104
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
105
+ snapshot_name = f"app_compressed_{timestamp}.db.gz"
106
+ gz_path = os.path.join(SNAPSHOT_DIR, snapshot_name)
107
+
108
+ # Create uncompressed snapshot first (temp)
109
+ result = create_snapshot(label="temp_for_compress")
110
  if result["status"] != "success":
111
  return result
112
 
113
+ temp_snapshot_path = result["path"]
114
+ original_size = result["size_bytes"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
 
116
+ try:
117
+ # Compress directly to final location
118
+ with open(temp_snapshot_path, 'rb') as f_in:
119
+ with gzip.open(gz_path, 'wb') as f_out:
120
+ shutil.copyfileobj(f_in, f_out)
121
+
122
+ # Remove temp uncompressed file
123
+ if os.path.exists(temp_snapshot_path):
124
+ os.remove(temp_snapshot_path)
125
+
126
+ gz_size = os.path.getsize(gz_path)
127
+
128
+ _log_backup("compressed_snapshot", gz_path, gz_size, "success")
129
+
130
+ logger.info(f"βœ… Compressed snapshot created: {snapshot_name} ({gz_size} bytes, {round((1 - gz_size/original_size)*100, 1)}% reduction)")
131
+
132
+ return {
133
+ "status": "success",
134
+ "file": snapshot_name,
135
+ "path": gz_path, # This is now the .gz file
136
+ "size_bytes": original_size,
137
+ "size_mb": round(original_size / (1024 * 1024), 2),
138
+ "compressed_size_bytes": gz_size,
139
+ "compressed_size_mb": round(gz_size / (1024 * 1024), 2),
140
+ "compression_ratio": round((1 - gz_size / original_size) * 100, 1),
141
+ }
142
+ except Exception as e:
143
+ # Clean up on failure
144
+ if os.path.exists(temp_snapshot_path):
145
+ os.remove(temp_snapshot_path)
146
+ if os.path.exists(gz_path):
147
+ os.remove(gz_path)
148
+ _log_backup("compressed_snapshot", gz_path, 0, "error", str(e))
149
+ logger.error(f"❌ Compressed snapshot failed: {e}")
150
+ return {"status": "error", "message": str(e)}
151
 
152
 
153
  def create_vacuum_snapshot() -> dict:
 
189
  # LAYER 3: HF Bucket Sync β€” Persistent Remote Storage
190
  # ═══════════════════════════════════════════════════════
191
 
192
+ def ensure_hf_repo_exists() -> dict:
193
+ """
194
+ Ensure the HF dataset repository exists.
195
+ Creates it if it doesn't exist.
196
+
197
+ Returns status dict.
198
+ """
199
+ if not HF_TOKEN:
200
+ return {"status": "error", "message": "HF_TOKEN not set"}
201
+
202
+ if not HF_BUCKET_REPO:
203
+ return {"status": "error", "message": "HF_BUCKET_REPO not set"}
204
+
205
+ try:
206
+ api = HfApi(token=HF_TOKEN)
207
+
208
+ # Check if repo exists
209
+ try:
210
+ if repo_exists(repo_id=HF_BUCKET_REPO, repo_type="dataset", token=HF_TOKEN):
211
+ logger.info(f"βœ… HF repo exists: {HF_BUCKET_REPO}")
212
+ return {"status": "success", "message": "Repo exists"}
213
+ except Exception:
214
+ pass
215
+
216
+ # Create repo if it doesn't exist
217
+ logger.info(f"Creating HF dataset repo: {HF_BUCKET_REPO}")
218
+ create_repo(
219
+ repo_id=HF_BUCKET_REPO,
220
+ repo_type="dataset",
221
+ private=True,
222
+ exist_ok=True,
223
+ token=HF_TOKEN
224
+ )
225
+ logger.info(f"βœ… Created HF dataset repo: {HF_BUCKET_REPO}")
226
+ return {"status": "success", "message": "Repo created"}
227
+
228
+ except Exception as e:
229
+ logger.error(f"❌ Failed to ensure HF repo exists: {e}")
230
+ return {"status": "error", "message": str(e)}
231
+
232
+
233
  def upload_to_hf_bucket(local_path: str = None) -> dict:
234
  """
235
  Upload a database snapshot to HuggingFace Bucket.
 
238
 
239
  Source: https://huggingface.co/docs/huggingface_hub/guides/upload
240
  """
241
+ # Check credentials
242
  if not HF_TOKEN or not HF_BUCKET_REPO:
243
+ msg = f"HF_TOKEN={'set' if HF_TOKEN else 'NOT SET'}, HF_BUCKET_REPO={'set' if HF_BUCKET_REPO else 'NOT SET'}"
244
+ logger.error(f"❌ HF credentials missing: {msg}")
245
+ return {"status": "error", "message": f"Credentials not configured: {msg}"}
246
 
247
+ # Ensure repo exists
248
+ repo_status = ensure_hf_repo_exists()
249
+ if repo_status["status"] != "success":
250
+ return repo_status
251
 
252
  # Create snapshot if not provided
253
  if local_path is None:
254
  result = create_compressed_snapshot()
255
  if result["status"] != "success":
256
  return result
257
+ local_path = result["path"] # Now correctly points to .gz file
258
+
259
+ # Verify file exists
260
+ if not os.path.exists(local_path):
261
+ return {"status": "error", "message": f"Local file not found: {local_path}"}
262
 
263
  filename = os.path.basename(local_path)
264
+ file_size = os.path.getsize(local_path)
265
 
266
  try:
267
+ api = HfApi(token=HF_TOKEN)
268
+
269
+ logger.info(f"πŸ“€ Uploading to HF Bucket: {filename} ({file_size} bytes)")
270
+
271
  # Upload latest (overwrites)
272
  api.upload_file(
273
  path_or_fileobj=local_path,
274
+ path_in_repo="db-backups/latest.db.gz",
275
  repo_id=HF_BUCKET_REPO,
276
+ repo_type="dataset",
277
  )
278
 
279
+ # Also keep a timestamped copy (rolling archive)
280
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
281
  api.upload_file(
282
  path_or_fileobj=local_path,
283
  path_in_repo=f"db-backups/archive/{filename}",
 
285
  repo_type="dataset",
286
  )
287
 
 
288
  _log_backup("hf_bucket", f"db-backups/{filename}", file_size, "success")
289
 
290
  logger.info(f"βœ… Uploaded to HF Bucket: {filename}")
291
+ return {"status": "success", "remote_path": f"db-backups/{filename}", "size_bytes": file_size}
292
 
293
  except Exception as e:
294
+ error_msg = str(e)
295
+ _log_backup("hf_bucket", local_path, 0, "error", error_msg)
296
+ logger.error(f"❌ HF Bucket upload failed: {error_msg}")
297
+ return {"status": "error", "message": error_msg}
298
 
299
 
300
  def restore_from_hf_bucket() -> dict:
 
306
  """
307
  if not HF_TOKEN or not HF_BUCKET_REPO:
308
  logger.warning("⚠️ HF credentials not set, skipping restore")
309
+ return {"status": "skipped", "message": "No HF credentials configured"}
310
 
311
+ if os.path.exists(DB_PATH) and os.path.getsize(DB_PATH) > 0:
312
  logger.info("βœ… Database already exists, skipping restore")
313
  return {"status": "skipped", "message": "DB already exists"}
314
 
315
+ # Ensure restore directory exists
316
+ os.makedirs(RESTORE_DIR, exist_ok=True)
317
+ os.makedirs(os.path.dirname(DB_PATH), exist_ok=True)
318
+
319
  try:
320
+ logger.info(f"πŸ“₯ Attempting to restore from HF Bucket: {HF_BUCKET_REPO}")
321
 
322
  # Download latest backup
323
  local_gz = hf_hub_download(
324
  repo_id=HF_BUCKET_REPO,
325
  filename="db-backups/latest.db.gz",
326
  repo_type="dataset",
327
+ local_dir=RESTORE_DIR,
328
  token=HF_TOKEN,
329
  )
330
 
331
+ logger.info(f"βœ… Downloaded backup: {local_gz}")
332
+
333
  # Decompress
 
334
  with gzip.open(local_gz, 'rb') as f_in:
335
  with open(DB_PATH, 'wb') as f_out:
336
  shutil.copyfileobj(f_in, f_out)
 
349
  return {"status": "error", "message": f"Integrity check failed: {result[0]}"}
350
 
351
  except Exception as e:
352
+ error_msg = str(e)
353
+ # Check if it's a "file not found" error (first run)
354
+ if "404" in error_msg or "not found" in error_msg.lower():
355
+ logger.info("πŸ“¦ No backup found in HF Bucket (this is normal for first run)")
356
+ return {"status": "not_found", "message": "No backup exists yet - this is normal for first run"}
357
+ else:
358
+ logger.error(f"❌ Restore from HF Bucket failed: {error_msg}")
359
+ return {"status": "error", "message": error_msg}
360
 
361
 
362
  # ═══════════════════════════════════════════════════════
 
369
  Creates snapshot + uploads to HF Bucket.
370
  """
371
  import asyncio
372
+
373
+ # Wait a bit before first backup (let app stabilize)
374
+ await asyncio.sleep(60)
375
+
376
  while True:
 
377
  try:
378
+ logger.info("⏰ Running scheduled backup...")
379
+ result = upload_to_hf_bucket()
380
+ if result["status"] == "success":
381
+ logger.info("βœ… Scheduled backup completed successfully")
382
+ else:
383
+ logger.warning(f"⚠️ Scheduled backup completed with issues: {result.get('message', 'unknown')}")
384
+
385
  cleanup_old_snapshots(max_age_hours=48)
386
  except Exception as e:
387
+ logger.error(f"❌ Scheduled backup error: {e}")
388
+
389
+ await asyncio.sleep(1800) # 30 minutes
390
 
391
 
392
  def cleanup_old_snapshots(max_age_hours: int = 48):
393
  """Remove local snapshots older than max_age_hours."""
394
  cutoff = datetime.now() - timedelta(hours=max_age_hours)
395
+ cleaned = 0
396
  for f in Path(SNAPSHOT_DIR).glob("*.db*"):
397
+ try:
398
+ if datetime.fromtimestamp(f.stat().st_mtime) < cutoff:
399
+ f.unlink()
400
+ cleaned += 1
401
+ logger.info(f"πŸ—‘οΈ Removed old snapshot: {f.name}")
402
+ except Exception as e:
403
+ logger.warning(f"Failed to remove {f.name}: {e}")
404
+
405
+ if cleaned > 0:
406
+ logger.info(f"πŸ—‘οΈ Cleaned up {cleaned} old snapshots")
407
 
408
 
409
  def list_backups() -> dict:
 
411
  local_files = []
412
 
413
  for f in sorted(Path(SNAPSHOT_DIR).glob("*.db*"), key=lambda x: x.stat().st_mtime, reverse=True):
414
+ try:
415
+ local_files.append({
416
+ "name": f.name,
417
+ "size_bytes": f.stat().st_size,
418
+ "size_mb": round(f.stat().st_size / (1024 * 1024), 2),
419
+ "modified": datetime.fromtimestamp(f.stat().st_mtime).isoformat(),
420
+ })
421
+ except Exception:
422
+ pass
423
 
424
+ return {
425
+ "local_snapshots": local_files,
426
+ "snapshot_dir": SNAPSHOT_DIR,
427
+ "hf_bucket_repo": HF_BUCKET_REPO if HF_BUCKET_REPO else "Not configured",
428
+ "hf_token_set": bool(HF_TOKEN),
429
+ }
430
 
431
 
432
  # ── Internal helper ──
 
440
  )
441
  conn.commit()
442
  conn.close()
443
+ except Exception as e:
444
+ logger.warning(f"Failed to log backup event: {e}")
445
+
446
+
447
+ # ═══════════════════════════════════════════════════════
448
+ # DIAGNOSTICS
449
+ # ═══════════════════════════════════════════════════════
450
+
451
+ def diagnose_hf_setup() -> dict:
452
+ """
453
+ Diagnose HF setup issues.
454
+ Run this to check if everything is configured correctly.
455
+ """
456
+ results = {
457
+ "checks": [],
458
+ "overall_status": "unknown"
459
+ }
460
+
461
+ # Check HF_TOKEN
462
+ if HF_TOKEN:
463
+ results["checks"].append({
464
+ "name": "HF_TOKEN",
465
+ "status": "βœ… SET",
466
+ "value": f"{HF_TOKEN[:8]}...{HF_TOKEN[-4:]}" if len(HF_TOKEN) > 12 else "***"
467
+ })
468
+ else:
469
+ results["checks"].append({
470
+ "name": "HF_TOKEN",
471
+ "status": "❌ NOT SET",
472
+ "value": "Environment variable HF_TOKEN is missing"
473
+ })
474
+
475
+ # Check HF_BUCKET_REPO
476
+ if HF_BUCKET_REPO:
477
+ results["checks"].append({
478
+ "name": "HF_BUCKET_REPO",
479
+ "status": "βœ… SET",
480
+ "value": HF_BUCKET_REPO
481
+ })
482
+ else:
483
+ results["checks"].append({
484
+ "name": "HF_BUCKET_REPO",
485
+ "status": "❌ NOT SET",
486
+ "value": "Environment variable HF_BUCKET_REPO is missing"
487
+ })
488
+
489
+ # Try to verify token by accessing API
490
+ if HF_TOKEN:
491
+ try:
492
+ api = HfApi(token=HF_TOKEN)
493
+ user_info = api.whoami()
494
+ results["checks"].append({
495
+ "name": "Token Validation",
496
+ "status": "βœ… VALID",
497
+ "value": f"Logged in as: {user_info.get('name', 'unknown')}"
498
+ })
499
+ except Exception as e:
500
+ results["checks"].append({
501
+ "name": "Token Validation",
502
+ "status": "❌ INVALID",
503
+ "value": str(e)
504
+ })
505
+
506
+ # Check if repo exists/is accessible
507
+ if HF_TOKEN and HF_BUCKET_REPO:
508
+ try:
509
+ exists = repo_exists(repo_id=HF_BUCKET_REPO, repo_type="dataset", token=HF_TOKEN)
510
+ results["checks"].append({
511
+ "name": "Repository Check",
512
+ "status": "βœ… EXISTS" if exists else "⚠️ WILL BE CREATED",
513
+ "value": HF_BUCKET_REPO
514
+ })
515
+ except Exception as e:
516
+ results["checks"].append({
517
+ "name": "Repository Check",
518
+ "status": "❌ ERROR",
519
+ "value": str(e)
520
+ })
521
+
522
+ # Determine overall status
523
+ failed = [c for c in results["checks"] if "❌" in c["status"]]
524
+ if not failed:
525
+ results["overall_status"] = "βœ… All checks passed"
526
+ else:
527
+ results["overall_status"] = f"❌ {len(failed)} check(s) failed"
528
+
529
+ return results
static/admin.css CHANGED
@@ -437,6 +437,49 @@ body {
437
  line-height: 1.5;
438
  }
439
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
440
  /* ═══════════════════════════════════════════
441
  Footer
442
  ═══════════════════════════════════════════ */
 
437
  line-height: 1.5;
438
  }
439
 
440
+ /* ═══════════════════════════════════════════
441
+ Diagnostics
442
+ ═══════════════════════════════════════════ */
443
+
444
+ .diagnostics {
445
+ display: flex;
446
+ flex-direction: column;
447
+ gap: 0.5rem;
448
+ }
449
+
450
+ .diag-item {
451
+ display: grid;
452
+ grid-template-columns: 150px 100px 1fr;
453
+ gap: 1rem;
454
+ padding: 0.75rem;
455
+ background: var(--background);
456
+ border-radius: 6px;
457
+ align-items: center;
458
+ }
459
+
460
+ .diag-name {
461
+ font-weight: 600;
462
+ color: var(--text-muted);
463
+ }
464
+
465
+ .diag-status {
466
+ font-weight: 600;
467
+ }
468
+
469
+ .diag-value {
470
+ font-family: 'JetBrains Mono', 'Fira Code', monospace;
471
+ font-size: 0.85rem;
472
+ color: var(--text-muted);
473
+ word-break: break-all;
474
+ }
475
+
476
+ .diag-overall {
477
+ margin-top: 1rem;
478
+ padding-top: 1rem;
479
+ border-top: 1px solid var(--border);
480
+ text-align: center;
481
+ }
482
+
483
  /* ═══════════════════════════════════════════
484
  Footer
485
  ═══════════════════════════════════════════ */