Upload 21 files
Browse files- app/admin/router.py +22 -1
- app/admin/templates/backups.html +33 -1
- app/backup.py +268 -58
- static/admin.css +43 -0
app/admin/router.py
CHANGED
|
@@ -27,7 +27,7 @@ import secrets
|
|
| 27 |
from app.database import get_db, DB_PATH, integrity_check
|
| 28 |
from app.backup import (
|
| 29 |
create_snapshot, create_compressed_snapshot, create_vacuum_snapshot,
|
| 30 |
-
upload_to_hf_bucket, list_backups, restore_from_hf_bucket
|
| 31 |
)
|
| 32 |
|
| 33 |
router = APIRouter(prefix="/admin", tags=["admin"])
|
|
@@ -165,11 +165,13 @@ async def run_query(request: Request, sql: str = Form(...), _=Depends(verify_adm
|
|
| 165 |
async def backups_page(request: Request, _=Depends(verify_admin)):
|
| 166 |
backup_info = list_backups()
|
| 167 |
health = integrity_check()
|
|
|
|
| 168 |
|
| 169 |
return templates.TemplateResponse("backups.html", {
|
| 170 |
"request": request,
|
| 171 |
"backups": backup_info,
|
| 172 |
"health": health,
|
|
|
|
| 173 |
})
|
| 174 |
|
| 175 |
@router.post("/backups/create")
|
|
@@ -185,6 +187,8 @@ async def create_backup(request: Request, backup_type: str = Form("snapshot"), _
|
|
| 185 |
else:
|
| 186 |
result = {"status": "error", "message": f"Unknown type: {backup_type}"}
|
| 187 |
|
|
|
|
|
|
|
| 188 |
return RedirectResponse(url="/admin/backups", status_code=303)
|
| 189 |
|
| 190 |
@router.get("/backups/download/{filename}")
|
|
@@ -213,3 +217,20 @@ async def export_table(table_name: str, format: str = "json", _=Depends(verify_a
|
|
| 213 |
conn = get_db()
|
| 214 |
rows = conn.execute(f"SELECT * FROM [{table_name}]").fetchall()
|
| 215 |
return [dict(r) for r in rows]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
from app.database import get_db, DB_PATH, integrity_check
|
| 28 |
from app.backup import (
|
| 29 |
create_snapshot, create_compressed_snapshot, create_vacuum_snapshot,
|
| 30 |
+
upload_to_hf_bucket, list_backups, restore_from_hf_bucket, diagnose_hf_setup
|
| 31 |
)
|
| 32 |
|
| 33 |
router = APIRouter(prefix="/admin", tags=["admin"])
|
|
|
|
| 165 |
async def backups_page(request: Request, _=Depends(verify_admin)):
|
| 166 |
backup_info = list_backups()
|
| 167 |
health = integrity_check()
|
| 168 |
+
hf_diagnosis = diagnose_hf_setup()
|
| 169 |
|
| 170 |
return templates.TemplateResponse("backups.html", {
|
| 171 |
"request": request,
|
| 172 |
"backups": backup_info,
|
| 173 |
"health": health,
|
| 174 |
+
"hf_diagnosis": hf_diagnosis,
|
| 175 |
})
|
| 176 |
|
| 177 |
@router.post("/backups/create")
|
|
|
|
| 187 |
else:
|
| 188 |
result = {"status": "error", "message": f"Unknown type: {backup_type}"}
|
| 189 |
|
| 190 |
+
# Store result in session/cookie for display
|
| 191 |
+
# For simplicity, we just redirect - the result is logged
|
| 192 |
return RedirectResponse(url="/admin/backups", status_code=303)
|
| 193 |
|
| 194 |
@router.get("/backups/download/{filename}")
|
|
|
|
| 217 |
conn = get_db()
|
| 218 |
rows = conn.execute(f"SELECT * FROM [{table_name}]").fetchall()
|
| 219 |
return [dict(r) for r in rows]
|
| 220 |
+
|
| 221 |
+
# ββ DIAGNOSTICS ββ
|
| 222 |
+
@router.get("/api/diagnostics")
|
| 223 |
+
async def api_diagnostics(_=Depends(verify_admin)):
|
| 224 |
+
"""Run HF diagnostics - check credentials and repo access."""
|
| 225 |
+
return diagnose_hf_setup()
|
| 226 |
+
|
| 227 |
+
@router.post("/api/backup/upload")
|
| 228 |
+
async def api_backup_upload(_=Depends(verify_admin)):
|
| 229 |
+
"""Trigger HF bucket upload via API."""
|
| 230 |
+
return upload_to_hf_bucket()
|
| 231 |
+
|
| 232 |
+
@router.post("/api/backup/test")
|
| 233 |
+
async def api_backup_test(_=Depends(verify_admin)):
|
| 234 |
+
"""Test backup creation and return result."""
|
| 235 |
+
result = create_compressed_snapshot()
|
| 236 |
+
return result
|
app/admin/templates/backups.html
CHANGED
|
@@ -7,6 +7,23 @@
|
|
| 7 |
<a href="/admin/dashboard" class="btn-secondary">β Back to Dashboard</a>
|
| 8 |
</div>
|
| 9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
<!-- Health Status -->
|
| 11 |
<div class="card">
|
| 12 |
<h2>Database Status</h2>
|
|
@@ -53,7 +70,11 @@
|
|
| 53 |
<!-- Local Snapshots -->
|
| 54 |
<div class="card">
|
| 55 |
<h2>π Local Snapshots</h2>
|
| 56 |
-
<p class="info-text">
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
|
| 58 |
{% if backups.local_snapshots %}
|
| 59 |
<table class="data-table">
|
|
@@ -83,6 +104,17 @@
|
|
| 83 |
{% endif %}
|
| 84 |
</div>
|
| 85 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
<!-- Backup Strategy Info -->
|
| 87 |
<div class="card">
|
| 88 |
<h2>π 3-Layer Backup Strategy</h2>
|
|
|
|
| 7 |
<a href="/admin/dashboard" class="btn-secondary">β Back to Dashboard</a>
|
| 8 |
</div>
|
| 9 |
|
| 10 |
+
<!-- HF Diagnostics -->
|
| 11 |
+
<div class="card {% if 'β' in hf_diagnosis.overall_status %}error-card{% endif %}">
|
| 12 |
+
<h2>π§ HF Setup Diagnostics</h2>
|
| 13 |
+
<div class="diagnostics">
|
| 14 |
+
{% for check in hf_diagnosis.checks %}
|
| 15 |
+
<div class="diag-item">
|
| 16 |
+
<span class="diag-name">{{ check.name }}</span>
|
| 17 |
+
<span class="diag-status">{{ check.status }}</span>
|
| 18 |
+
<span class="diag-value">{{ check.value }}</span>
|
| 19 |
+
</div>
|
| 20 |
+
{% endfor %}
|
| 21 |
+
<div class="diag-overall">
|
| 22 |
+
<strong>{{ hf_diagnosis.overall_status }}</strong>
|
| 23 |
+
</div>
|
| 24 |
+
</div>
|
| 25 |
+
</div>
|
| 26 |
+
|
| 27 |
<!-- Health Status -->
|
| 28 |
<div class="card">
|
| 29 |
<h2>Database Status</h2>
|
|
|
|
| 70 |
<!-- Local Snapshots -->
|
| 71 |
<div class="card">
|
| 72 |
<h2>π Local Snapshots</h2>
|
| 73 |
+
<p class="info-text">
|
| 74 |
+
Directory: <code>{{ backups.snapshot_dir }}</code><br>
|
| 75 |
+
HF Bucket Repo: <code>{{ backups.hf_bucket_repo }}</code><br>
|
| 76 |
+
HF Token: <code>{% if backups.hf_token_set %}β
Set{% else %}β Not Set{% endif %}</code>
|
| 77 |
+
</p>
|
| 78 |
|
| 79 |
{% if backups.local_snapshots %}
|
| 80 |
<table class="data-table">
|
|
|
|
| 104 |
{% endif %}
|
| 105 |
</div>
|
| 106 |
|
| 107 |
+
<!-- Quick API Test -->
|
| 108 |
+
<div class="card">
|
| 109 |
+
<h2>π§ͺ Quick Test</h2>
|
| 110 |
+
<p class="info-text">Use these API endpoints to test backup functionality:</p>
|
| 111 |
+
<div class="quick-queries">
|
| 112 |
+
<code>GET /admin/api/diagnostics</code> - Check HF setup<br>
|
| 113 |
+
<code>POST /admin/api/backup/test</code> - Test backup creation<br>
|
| 114 |
+
<code>POST /admin/api/backup/upload</code> - Upload to HF Bucket
|
| 115 |
+
</div>
|
| 116 |
+
</div>
|
| 117 |
+
|
| 118 |
<!-- Backup Strategy Info -->
|
| 119 |
<div class="card">
|
| 120 |
<h2>π 3-Layer Backup Strategy</h2>
|
app/backup.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
"""
|
| 2 |
-
Backup Module β 3-Layer Backup Strategy
|
| 3 |
-
ββββββββββββββββββββββββββββββββββββββββ
|
| 4 |
Layer 1: Litestream β local file replica (every 1s, automatic)
|
| 5 |
Layer 2: Python sqlite3.backup() β snapshot to /tmp (on-demand + scheduled)
|
| 6 |
Layer 3: Upload snapshot to HF Bucket (persistent, survives restarts)
|
|
@@ -20,7 +20,7 @@ import shutil
|
|
| 20 |
import logging
|
| 21 |
from datetime import datetime, timedelta
|
| 22 |
from pathlib import Path
|
| 23 |
-
from huggingface_hub import HfApi, hf_hub_download
|
| 24 |
from contextlib import contextmanager
|
| 25 |
|
| 26 |
logger = logging.getLogger(__name__)
|
|
@@ -28,12 +28,16 @@ logger = logging.getLogger(__name__)
|
|
| 28 |
DB_PATH = "/tmp/data/app.db"
|
| 29 |
BACKUP_DIR = "/tmp/data/backups"
|
| 30 |
SNAPSHOT_DIR = "/tmp/data/snapshots"
|
|
|
|
| 31 |
|
| 32 |
HF_TOKEN = os.environ.get("HF_TOKEN", "")
|
| 33 |
-
HF_BUCKET_REPO = os.environ.get("HF_BUCKET_REPO", "") # "username/
|
| 34 |
|
|
|
|
| 35 |
os.makedirs(BACKUP_DIR, exist_ok=True)
|
| 36 |
os.makedirs(SNAPSHOT_DIR, exist_ok=True)
|
|
|
|
|
|
|
| 37 |
|
| 38 |
|
| 39 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
@@ -50,7 +54,10 @@ def create_snapshot(label: str = "manual") -> dict:
|
|
| 50 |
Source: https://docs.python.org/3/library/sqlite3.html#sqlite3.Connection.backup
|
| 51 |
"""
|
| 52 |
if not os.path.exists(DB_PATH):
|
| 53 |
-
|
|
|
|
|
|
|
|
|
|
| 54 |
|
| 55 |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 56 |
snapshot_name = f"app_{label}_{timestamp}.db"
|
|
@@ -94,29 +101,53 @@ def create_compressed_snapshot() -> dict:
|
|
| 94 |
|
| 95 |
Source: https://litestream.io/alternatives/cron/
|
| 96 |
"""
|
| 97 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
if result["status"] != "success":
|
| 99 |
return result
|
| 100 |
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
with open(snapshot_path, 'rb') as f_in:
|
| 105 |
-
with gzip.open(gz_path, 'wb') as f_out:
|
| 106 |
-
shutil.copyfileobj(f_in, f_out)
|
| 107 |
-
|
| 108 |
-
# Remove uncompressed version
|
| 109 |
-
os.remove(snapshot_path)
|
| 110 |
-
|
| 111 |
-
gz_size = os.path.getsize(gz_path)
|
| 112 |
-
result["compressed_path"] = gz_path
|
| 113 |
-
result["compressed_size_bytes"] = gz_size
|
| 114 |
-
result["compressed_size_mb"] = round(gz_size / (1024 * 1024), 2)
|
| 115 |
-
result["compression_ratio"] = round(
|
| 116 |
-
(1 - gz_size / result["size_bytes"]) * 100, 1
|
| 117 |
-
)
|
| 118 |
|
| 119 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
|
| 121 |
|
| 122 |
def create_vacuum_snapshot() -> dict:
|
|
@@ -158,6 +189,47 @@ def create_vacuum_snapshot() -> dict:
|
|
| 158 |
# LAYER 3: HF Bucket Sync β Persistent Remote Storage
|
| 159 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 160 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 161 |
def upload_to_hf_bucket(local_path: str = None) -> dict:
|
| 162 |
"""
|
| 163 |
Upload a database snapshot to HuggingFace Bucket.
|
|
@@ -166,31 +238,46 @@ def upload_to_hf_bucket(local_path: str = None) -> dict:
|
|
| 166 |
|
| 167 |
Source: https://huggingface.co/docs/huggingface_hub/guides/upload
|
| 168 |
"""
|
|
|
|
| 169 |
if not HF_TOKEN or not HF_BUCKET_REPO:
|
| 170 |
-
|
|
|
|
|
|
|
| 171 |
|
| 172 |
-
|
|
|
|
|
|
|
|
|
|
| 173 |
|
| 174 |
# Create snapshot if not provided
|
| 175 |
if local_path is None:
|
| 176 |
result = create_compressed_snapshot()
|
| 177 |
if result["status"] != "success":
|
| 178 |
return result
|
| 179 |
-
local_path = result
|
|
|
|
|
|
|
|
|
|
|
|
|
| 180 |
|
| 181 |
filename = os.path.basename(local_path)
|
| 182 |
-
|
| 183 |
|
| 184 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
# Upload latest (overwrites)
|
| 186 |
api.upload_file(
|
| 187 |
path_or_fileobj=local_path,
|
| 188 |
-
path_in_repo=
|
| 189 |
repo_id=HF_BUCKET_REPO,
|
| 190 |
-
repo_type="dataset",
|
| 191 |
)
|
| 192 |
|
| 193 |
-
# Also keep a timestamped copy (rolling
|
|
|
|
| 194 |
api.upload_file(
|
| 195 |
path_or_fileobj=local_path,
|
| 196 |
path_in_repo=f"db-backups/archive/{filename}",
|
|
@@ -198,16 +285,16 @@ def upload_to_hf_bucket(local_path: str = None) -> dict:
|
|
| 198 |
repo_type="dataset",
|
| 199 |
)
|
| 200 |
|
| 201 |
-
file_size = os.path.getsize(local_path)
|
| 202 |
_log_backup("hf_bucket", f"db-backups/{filename}", file_size, "success")
|
| 203 |
|
| 204 |
logger.info(f"β
Uploaded to HF Bucket: {filename}")
|
| 205 |
-
return {"status": "success", "remote_path": f"db-backups/{filename}"}
|
| 206 |
|
| 207 |
except Exception as e:
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
|
|
|
| 211 |
|
| 212 |
|
| 213 |
def restore_from_hf_bucket() -> dict:
|
|
@@ -219,26 +306,31 @@ def restore_from_hf_bucket() -> dict:
|
|
| 219 |
"""
|
| 220 |
if not HF_TOKEN or not HF_BUCKET_REPO:
|
| 221 |
logger.warning("β οΈ HF credentials not set, skipping restore")
|
| 222 |
-
return {"status": "skipped", "message": "No HF credentials"}
|
| 223 |
|
| 224 |
-
if os.path.exists(DB_PATH):
|
| 225 |
logger.info("β
Database already exists, skipping restore")
|
| 226 |
return {"status": "skipped", "message": "DB already exists"}
|
| 227 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 228 |
try:
|
| 229 |
-
|
| 230 |
|
| 231 |
# Download latest backup
|
| 232 |
local_gz = hf_hub_download(
|
| 233 |
repo_id=HF_BUCKET_REPO,
|
| 234 |
filename="db-backups/latest.db.gz",
|
| 235 |
repo_type="dataset",
|
| 236 |
-
local_dir=
|
| 237 |
token=HF_TOKEN,
|
| 238 |
)
|
| 239 |
|
|
|
|
|
|
|
| 240 |
# Decompress
|
| 241 |
-
os.makedirs(os.path.dirname(DB_PATH), exist_ok=True)
|
| 242 |
with gzip.open(local_gz, 'rb') as f_in:
|
| 243 |
with open(DB_PATH, 'wb') as f_out:
|
| 244 |
shutil.copyfileobj(f_in, f_out)
|
|
@@ -257,8 +349,14 @@ def restore_from_hf_bucket() -> dict:
|
|
| 257 |
return {"status": "error", "message": f"Integrity check failed: {result[0]}"}
|
| 258 |
|
| 259 |
except Exception as e:
|
| 260 |
-
|
| 261 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 262 |
|
| 263 |
|
| 264 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
@@ -271,22 +369,41 @@ async def scheduled_backup_task():
|
|
| 271 |
Creates snapshot + uploads to HF Bucket.
|
| 272 |
"""
|
| 273 |
import asyncio
|
|
|
|
|
|
|
|
|
|
|
|
|
| 274 |
while True:
|
| 275 |
-
await asyncio.sleep(1800) # 30 minutes
|
| 276 |
try:
|
| 277 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 278 |
cleanup_old_snapshots(max_age_hours=48)
|
| 279 |
except Exception as e:
|
| 280 |
-
logger.error(f"Scheduled backup error: {e}")
|
|
|
|
|
|
|
| 281 |
|
| 282 |
|
| 283 |
def cleanup_old_snapshots(max_age_hours: int = 48):
|
| 284 |
"""Remove local snapshots older than max_age_hours."""
|
| 285 |
cutoff = datetime.now() - timedelta(hours=max_age_hours)
|
|
|
|
| 286 |
for f in Path(SNAPSHOT_DIR).glob("*.db*"):
|
| 287 |
-
|
| 288 |
-
f.
|
| 289 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 290 |
|
| 291 |
|
| 292 |
def list_backups() -> dict:
|
|
@@ -294,14 +411,22 @@ def list_backups() -> dict:
|
|
| 294 |
local_files = []
|
| 295 |
|
| 296 |
for f in sorted(Path(SNAPSHOT_DIR).glob("*.db*"), key=lambda x: x.stat().st_mtime, reverse=True):
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
|
|
|
|
|
|
|
|
|
| 303 |
|
| 304 |
-
return {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 305 |
|
| 306 |
|
| 307 |
# ββ Internal helper ββ
|
|
@@ -315,5 +440,90 @@ def _log_backup(backup_type, file_path, file_size, status, error=None):
|
|
| 315 |
)
|
| 316 |
conn.commit()
|
| 317 |
conn.close()
|
| 318 |
-
except:
|
| 319 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
"""
|
| 2 |
+
Backup Module β 3-Layer Backup Strategy (FIXED)
|
| 3 |
+
ββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 4 |
Layer 1: Litestream β local file replica (every 1s, automatic)
|
| 5 |
Layer 2: Python sqlite3.backup() β snapshot to /tmp (on-demand + scheduled)
|
| 6 |
Layer 3: Upload snapshot to HF Bucket (persistent, survives restarts)
|
|
|
|
| 20 |
import logging
|
| 21 |
from datetime import datetime, timedelta
|
| 22 |
from pathlib import Path
|
| 23 |
+
from huggingface_hub import HfApi, hf_hub_download, create_repo, repo_exists
|
| 24 |
from contextlib import contextmanager
|
| 25 |
|
| 26 |
logger = logging.getLogger(__name__)
|
|
|
|
| 28 |
DB_PATH = "/tmp/data/app.db"
|
| 29 |
BACKUP_DIR = "/tmp/data/backups"
|
| 30 |
SNAPSHOT_DIR = "/tmp/data/snapshots"
|
| 31 |
+
RESTORE_DIR = "/tmp/data/restore"
|
| 32 |
|
| 33 |
HF_TOKEN = os.environ.get("HF_TOKEN", "")
|
| 34 |
+
HF_BUCKET_REPO = os.environ.get("HF_BUCKET_REPO", "") # "username/repo-name"
|
| 35 |
|
| 36 |
+
# Ensure all directories exist
|
| 37 |
os.makedirs(BACKUP_DIR, exist_ok=True)
|
| 38 |
os.makedirs(SNAPSHOT_DIR, exist_ok=True)
|
| 39 |
+
os.makedirs(RESTORE_DIR, exist_ok=True)
|
| 40 |
+
os.makedirs(os.path.dirname(DB_PATH), exist_ok=True)
|
| 41 |
|
| 42 |
|
| 43 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 54 |
Source: https://docs.python.org/3/library/sqlite3.html#sqlite3.Connection.backup
|
| 55 |
"""
|
| 56 |
if not os.path.exists(DB_PATH):
|
| 57 |
+
logger.warning(f"Database not found at {DB_PATH}, creating empty database first")
|
| 58 |
+
# Create empty database with schema
|
| 59 |
+
from app.database import init_db
|
| 60 |
+
init_db()
|
| 61 |
|
| 62 |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 63 |
snapshot_name = f"app_{label}_{timestamp}.db"
|
|
|
|
| 101 |
|
| 102 |
Source: https://litestream.io/alternatives/cron/
|
| 103 |
"""
|
| 104 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 105 |
+
snapshot_name = f"app_compressed_{timestamp}.db.gz"
|
| 106 |
+
gz_path = os.path.join(SNAPSHOT_DIR, snapshot_name)
|
| 107 |
+
|
| 108 |
+
# Create uncompressed snapshot first (temp)
|
| 109 |
+
result = create_snapshot(label="temp_for_compress")
|
| 110 |
if result["status"] != "success":
|
| 111 |
return result
|
| 112 |
|
| 113 |
+
temp_snapshot_path = result["path"]
|
| 114 |
+
original_size = result["size_bytes"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
|
| 116 |
+
try:
|
| 117 |
+
# Compress directly to final location
|
| 118 |
+
with open(temp_snapshot_path, 'rb') as f_in:
|
| 119 |
+
with gzip.open(gz_path, 'wb') as f_out:
|
| 120 |
+
shutil.copyfileobj(f_in, f_out)
|
| 121 |
+
|
| 122 |
+
# Remove temp uncompressed file
|
| 123 |
+
if os.path.exists(temp_snapshot_path):
|
| 124 |
+
os.remove(temp_snapshot_path)
|
| 125 |
+
|
| 126 |
+
gz_size = os.path.getsize(gz_path)
|
| 127 |
+
|
| 128 |
+
_log_backup("compressed_snapshot", gz_path, gz_size, "success")
|
| 129 |
+
|
| 130 |
+
logger.info(f"β
Compressed snapshot created: {snapshot_name} ({gz_size} bytes, {round((1 - gz_size/original_size)*100, 1)}% reduction)")
|
| 131 |
+
|
| 132 |
+
return {
|
| 133 |
+
"status": "success",
|
| 134 |
+
"file": snapshot_name,
|
| 135 |
+
"path": gz_path, # This is now the .gz file
|
| 136 |
+
"size_bytes": original_size,
|
| 137 |
+
"size_mb": round(original_size / (1024 * 1024), 2),
|
| 138 |
+
"compressed_size_bytes": gz_size,
|
| 139 |
+
"compressed_size_mb": round(gz_size / (1024 * 1024), 2),
|
| 140 |
+
"compression_ratio": round((1 - gz_size / original_size) * 100, 1),
|
| 141 |
+
}
|
| 142 |
+
except Exception as e:
|
| 143 |
+
# Clean up on failure
|
| 144 |
+
if os.path.exists(temp_snapshot_path):
|
| 145 |
+
os.remove(temp_snapshot_path)
|
| 146 |
+
if os.path.exists(gz_path):
|
| 147 |
+
os.remove(gz_path)
|
| 148 |
+
_log_backup("compressed_snapshot", gz_path, 0, "error", str(e))
|
| 149 |
+
logger.error(f"β Compressed snapshot failed: {e}")
|
| 150 |
+
return {"status": "error", "message": str(e)}
|
| 151 |
|
| 152 |
|
| 153 |
def create_vacuum_snapshot() -> dict:
|
|
|
|
| 189 |
# LAYER 3: HF Bucket Sync β Persistent Remote Storage
|
| 190 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 191 |
|
| 192 |
+
def ensure_hf_repo_exists() -> dict:
|
| 193 |
+
"""
|
| 194 |
+
Ensure the HF dataset repository exists.
|
| 195 |
+
Creates it if it doesn't exist.
|
| 196 |
+
|
| 197 |
+
Returns status dict.
|
| 198 |
+
"""
|
| 199 |
+
if not HF_TOKEN:
|
| 200 |
+
return {"status": "error", "message": "HF_TOKEN not set"}
|
| 201 |
+
|
| 202 |
+
if not HF_BUCKET_REPO:
|
| 203 |
+
return {"status": "error", "message": "HF_BUCKET_REPO not set"}
|
| 204 |
+
|
| 205 |
+
try:
|
| 206 |
+
api = HfApi(token=HF_TOKEN)
|
| 207 |
+
|
| 208 |
+
# Check if repo exists
|
| 209 |
+
try:
|
| 210 |
+
if repo_exists(repo_id=HF_BUCKET_REPO, repo_type="dataset", token=HF_TOKEN):
|
| 211 |
+
logger.info(f"β
HF repo exists: {HF_BUCKET_REPO}")
|
| 212 |
+
return {"status": "success", "message": "Repo exists"}
|
| 213 |
+
except Exception:
|
| 214 |
+
pass
|
| 215 |
+
|
| 216 |
+
# Create repo if it doesn't exist
|
| 217 |
+
logger.info(f"Creating HF dataset repo: {HF_BUCKET_REPO}")
|
| 218 |
+
create_repo(
|
| 219 |
+
repo_id=HF_BUCKET_REPO,
|
| 220 |
+
repo_type="dataset",
|
| 221 |
+
private=True,
|
| 222 |
+
exist_ok=True,
|
| 223 |
+
token=HF_TOKEN
|
| 224 |
+
)
|
| 225 |
+
logger.info(f"β
Created HF dataset repo: {HF_BUCKET_REPO}")
|
| 226 |
+
return {"status": "success", "message": "Repo created"}
|
| 227 |
+
|
| 228 |
+
except Exception as e:
|
| 229 |
+
logger.error(f"β Failed to ensure HF repo exists: {e}")
|
| 230 |
+
return {"status": "error", "message": str(e)}
|
| 231 |
+
|
| 232 |
+
|
| 233 |
def upload_to_hf_bucket(local_path: str = None) -> dict:
|
| 234 |
"""
|
| 235 |
Upload a database snapshot to HuggingFace Bucket.
|
|
|
|
| 238 |
|
| 239 |
Source: https://huggingface.co/docs/huggingface_hub/guides/upload
|
| 240 |
"""
|
| 241 |
+
# Check credentials
|
| 242 |
if not HF_TOKEN or not HF_BUCKET_REPO:
|
| 243 |
+
msg = f"HF_TOKEN={'set' if HF_TOKEN else 'NOT SET'}, HF_BUCKET_REPO={'set' if HF_BUCKET_REPO else 'NOT SET'}"
|
| 244 |
+
logger.error(f"β HF credentials missing: {msg}")
|
| 245 |
+
return {"status": "error", "message": f"Credentials not configured: {msg}"}
|
| 246 |
|
| 247 |
+
# Ensure repo exists
|
| 248 |
+
repo_status = ensure_hf_repo_exists()
|
| 249 |
+
if repo_status["status"] != "success":
|
| 250 |
+
return repo_status
|
| 251 |
|
| 252 |
# Create snapshot if not provided
|
| 253 |
if local_path is None:
|
| 254 |
result = create_compressed_snapshot()
|
| 255 |
if result["status"] != "success":
|
| 256 |
return result
|
| 257 |
+
local_path = result["path"] # Now correctly points to .gz file
|
| 258 |
+
|
| 259 |
+
# Verify file exists
|
| 260 |
+
if not os.path.exists(local_path):
|
| 261 |
+
return {"status": "error", "message": f"Local file not found: {local_path}"}
|
| 262 |
|
| 263 |
filename = os.path.basename(local_path)
|
| 264 |
+
file_size = os.path.getsize(local_path)
|
| 265 |
|
| 266 |
try:
|
| 267 |
+
api = HfApi(token=HF_TOKEN)
|
| 268 |
+
|
| 269 |
+
logger.info(f"π€ Uploading to HF Bucket: {filename} ({file_size} bytes)")
|
| 270 |
+
|
| 271 |
# Upload latest (overwrites)
|
| 272 |
api.upload_file(
|
| 273 |
path_or_fileobj=local_path,
|
| 274 |
+
path_in_repo="db-backups/latest.db.gz",
|
| 275 |
repo_id=HF_BUCKET_REPO,
|
| 276 |
+
repo_type="dataset",
|
| 277 |
)
|
| 278 |
|
| 279 |
+
# Also keep a timestamped copy (rolling archive)
|
| 280 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 281 |
api.upload_file(
|
| 282 |
path_or_fileobj=local_path,
|
| 283 |
path_in_repo=f"db-backups/archive/{filename}",
|
|
|
|
| 285 |
repo_type="dataset",
|
| 286 |
)
|
| 287 |
|
|
|
|
| 288 |
_log_backup("hf_bucket", f"db-backups/{filename}", file_size, "success")
|
| 289 |
|
| 290 |
logger.info(f"β
Uploaded to HF Bucket: {filename}")
|
| 291 |
+
return {"status": "success", "remote_path": f"db-backups/{filename}", "size_bytes": file_size}
|
| 292 |
|
| 293 |
except Exception as e:
|
| 294 |
+
error_msg = str(e)
|
| 295 |
+
_log_backup("hf_bucket", local_path, 0, "error", error_msg)
|
| 296 |
+
logger.error(f"β HF Bucket upload failed: {error_msg}")
|
| 297 |
+
return {"status": "error", "message": error_msg}
|
| 298 |
|
| 299 |
|
| 300 |
def restore_from_hf_bucket() -> dict:
|
|
|
|
| 306 |
"""
|
| 307 |
if not HF_TOKEN or not HF_BUCKET_REPO:
|
| 308 |
logger.warning("β οΈ HF credentials not set, skipping restore")
|
| 309 |
+
return {"status": "skipped", "message": "No HF credentials configured"}
|
| 310 |
|
| 311 |
+
if os.path.exists(DB_PATH) and os.path.getsize(DB_PATH) > 0:
|
| 312 |
logger.info("β
Database already exists, skipping restore")
|
| 313 |
return {"status": "skipped", "message": "DB already exists"}
|
| 314 |
|
| 315 |
+
# Ensure restore directory exists
|
| 316 |
+
os.makedirs(RESTORE_DIR, exist_ok=True)
|
| 317 |
+
os.makedirs(os.path.dirname(DB_PATH), exist_ok=True)
|
| 318 |
+
|
| 319 |
try:
|
| 320 |
+
logger.info(f"π₯ Attempting to restore from HF Bucket: {HF_BUCKET_REPO}")
|
| 321 |
|
| 322 |
# Download latest backup
|
| 323 |
local_gz = hf_hub_download(
|
| 324 |
repo_id=HF_BUCKET_REPO,
|
| 325 |
filename="db-backups/latest.db.gz",
|
| 326 |
repo_type="dataset",
|
| 327 |
+
local_dir=RESTORE_DIR,
|
| 328 |
token=HF_TOKEN,
|
| 329 |
)
|
| 330 |
|
| 331 |
+
logger.info(f"β
Downloaded backup: {local_gz}")
|
| 332 |
+
|
| 333 |
# Decompress
|
|
|
|
| 334 |
with gzip.open(local_gz, 'rb') as f_in:
|
| 335 |
with open(DB_PATH, 'wb') as f_out:
|
| 336 |
shutil.copyfileobj(f_in, f_out)
|
|
|
|
| 349 |
return {"status": "error", "message": f"Integrity check failed: {result[0]}"}
|
| 350 |
|
| 351 |
except Exception as e:
|
| 352 |
+
error_msg = str(e)
|
| 353 |
+
# Check if it's a "file not found" error (first run)
|
| 354 |
+
if "404" in error_msg or "not found" in error_msg.lower():
|
| 355 |
+
logger.info("π¦ No backup found in HF Bucket (this is normal for first run)")
|
| 356 |
+
return {"status": "not_found", "message": "No backup exists yet - this is normal for first run"}
|
| 357 |
+
else:
|
| 358 |
+
logger.error(f"β Restore from HF Bucket failed: {error_msg}")
|
| 359 |
+
return {"status": "error", "message": error_msg}
|
| 360 |
|
| 361 |
|
| 362 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 369 |
Creates snapshot + uploads to HF Bucket.
|
| 370 |
"""
|
| 371 |
import asyncio
|
| 372 |
+
|
| 373 |
+
# Wait a bit before first backup (let app stabilize)
|
| 374 |
+
await asyncio.sleep(60)
|
| 375 |
+
|
| 376 |
while True:
|
|
|
|
| 377 |
try:
|
| 378 |
+
logger.info("β° Running scheduled backup...")
|
| 379 |
+
result = upload_to_hf_bucket()
|
| 380 |
+
if result["status"] == "success":
|
| 381 |
+
logger.info("β
Scheduled backup completed successfully")
|
| 382 |
+
else:
|
| 383 |
+
logger.warning(f"β οΈ Scheduled backup completed with issues: {result.get('message', 'unknown')}")
|
| 384 |
+
|
| 385 |
cleanup_old_snapshots(max_age_hours=48)
|
| 386 |
except Exception as e:
|
| 387 |
+
logger.error(f"β Scheduled backup error: {e}")
|
| 388 |
+
|
| 389 |
+
await asyncio.sleep(1800) # 30 minutes
|
| 390 |
|
| 391 |
|
| 392 |
def cleanup_old_snapshots(max_age_hours: int = 48):
|
| 393 |
"""Remove local snapshots older than max_age_hours."""
|
| 394 |
cutoff = datetime.now() - timedelta(hours=max_age_hours)
|
| 395 |
+
cleaned = 0
|
| 396 |
for f in Path(SNAPSHOT_DIR).glob("*.db*"):
|
| 397 |
+
try:
|
| 398 |
+
if datetime.fromtimestamp(f.stat().st_mtime) < cutoff:
|
| 399 |
+
f.unlink()
|
| 400 |
+
cleaned += 1
|
| 401 |
+
logger.info(f"ποΈ Removed old snapshot: {f.name}")
|
| 402 |
+
except Exception as e:
|
| 403 |
+
logger.warning(f"Failed to remove {f.name}: {e}")
|
| 404 |
+
|
| 405 |
+
if cleaned > 0:
|
| 406 |
+
logger.info(f"ποΈ Cleaned up {cleaned} old snapshots")
|
| 407 |
|
| 408 |
|
| 409 |
def list_backups() -> dict:
|
|
|
|
| 411 |
local_files = []
|
| 412 |
|
| 413 |
for f in sorted(Path(SNAPSHOT_DIR).glob("*.db*"), key=lambda x: x.stat().st_mtime, reverse=True):
|
| 414 |
+
try:
|
| 415 |
+
local_files.append({
|
| 416 |
+
"name": f.name,
|
| 417 |
+
"size_bytes": f.stat().st_size,
|
| 418 |
+
"size_mb": round(f.stat().st_size / (1024 * 1024), 2),
|
| 419 |
+
"modified": datetime.fromtimestamp(f.stat().st_mtime).isoformat(),
|
| 420 |
+
})
|
| 421 |
+
except Exception:
|
| 422 |
+
pass
|
| 423 |
|
| 424 |
+
return {
|
| 425 |
+
"local_snapshots": local_files,
|
| 426 |
+
"snapshot_dir": SNAPSHOT_DIR,
|
| 427 |
+
"hf_bucket_repo": HF_BUCKET_REPO if HF_BUCKET_REPO else "Not configured",
|
| 428 |
+
"hf_token_set": bool(HF_TOKEN),
|
| 429 |
+
}
|
| 430 |
|
| 431 |
|
| 432 |
# ββ Internal helper ββ
|
|
|
|
| 440 |
)
|
| 441 |
conn.commit()
|
| 442 |
conn.close()
|
| 443 |
+
except Exception as e:
|
| 444 |
+
logger.warning(f"Failed to log backup event: {e}")
|
| 445 |
+
|
| 446 |
+
|
| 447 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 448 |
+
# DIAGNOSTICS
|
| 449 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 450 |
+
|
| 451 |
+
def diagnose_hf_setup() -> dict:
|
| 452 |
+
"""
|
| 453 |
+
Diagnose HF setup issues.
|
| 454 |
+
Run this to check if everything is configured correctly.
|
| 455 |
+
"""
|
| 456 |
+
results = {
|
| 457 |
+
"checks": [],
|
| 458 |
+
"overall_status": "unknown"
|
| 459 |
+
}
|
| 460 |
+
|
| 461 |
+
# Check HF_TOKEN
|
| 462 |
+
if HF_TOKEN:
|
| 463 |
+
results["checks"].append({
|
| 464 |
+
"name": "HF_TOKEN",
|
| 465 |
+
"status": "β
SET",
|
| 466 |
+
"value": f"{HF_TOKEN[:8]}...{HF_TOKEN[-4:]}" if len(HF_TOKEN) > 12 else "***"
|
| 467 |
+
})
|
| 468 |
+
else:
|
| 469 |
+
results["checks"].append({
|
| 470 |
+
"name": "HF_TOKEN",
|
| 471 |
+
"status": "β NOT SET",
|
| 472 |
+
"value": "Environment variable HF_TOKEN is missing"
|
| 473 |
+
})
|
| 474 |
+
|
| 475 |
+
# Check HF_BUCKET_REPO
|
| 476 |
+
if HF_BUCKET_REPO:
|
| 477 |
+
results["checks"].append({
|
| 478 |
+
"name": "HF_BUCKET_REPO",
|
| 479 |
+
"status": "β
SET",
|
| 480 |
+
"value": HF_BUCKET_REPO
|
| 481 |
+
})
|
| 482 |
+
else:
|
| 483 |
+
results["checks"].append({
|
| 484 |
+
"name": "HF_BUCKET_REPO",
|
| 485 |
+
"status": "β NOT SET",
|
| 486 |
+
"value": "Environment variable HF_BUCKET_REPO is missing"
|
| 487 |
+
})
|
| 488 |
+
|
| 489 |
+
# Try to verify token by accessing API
|
| 490 |
+
if HF_TOKEN:
|
| 491 |
+
try:
|
| 492 |
+
api = HfApi(token=HF_TOKEN)
|
| 493 |
+
user_info = api.whoami()
|
| 494 |
+
results["checks"].append({
|
| 495 |
+
"name": "Token Validation",
|
| 496 |
+
"status": "β
VALID",
|
| 497 |
+
"value": f"Logged in as: {user_info.get('name', 'unknown')}"
|
| 498 |
+
})
|
| 499 |
+
except Exception as e:
|
| 500 |
+
results["checks"].append({
|
| 501 |
+
"name": "Token Validation",
|
| 502 |
+
"status": "β INVALID",
|
| 503 |
+
"value": str(e)
|
| 504 |
+
})
|
| 505 |
+
|
| 506 |
+
# Check if repo exists/is accessible
|
| 507 |
+
if HF_TOKEN and HF_BUCKET_REPO:
|
| 508 |
+
try:
|
| 509 |
+
exists = repo_exists(repo_id=HF_BUCKET_REPO, repo_type="dataset", token=HF_TOKEN)
|
| 510 |
+
results["checks"].append({
|
| 511 |
+
"name": "Repository Check",
|
| 512 |
+
"status": "β
EXISTS" if exists else "β οΈ WILL BE CREATED",
|
| 513 |
+
"value": HF_BUCKET_REPO
|
| 514 |
+
})
|
| 515 |
+
except Exception as e:
|
| 516 |
+
results["checks"].append({
|
| 517 |
+
"name": "Repository Check",
|
| 518 |
+
"status": "β ERROR",
|
| 519 |
+
"value": str(e)
|
| 520 |
+
})
|
| 521 |
+
|
| 522 |
+
# Determine overall status
|
| 523 |
+
failed = [c for c in results["checks"] if "β" in c["status"]]
|
| 524 |
+
if not failed:
|
| 525 |
+
results["overall_status"] = "β
All checks passed"
|
| 526 |
+
else:
|
| 527 |
+
results["overall_status"] = f"β {len(failed)} check(s) failed"
|
| 528 |
+
|
| 529 |
+
return results
|
static/admin.css
CHANGED
|
@@ -437,6 +437,49 @@ body {
|
|
| 437 |
line-height: 1.5;
|
| 438 |
}
|
| 439 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 440 |
/* βββββββββββββββββββββββββββββββββββββββββββ
|
| 441 |
Footer
|
| 442 |
βββββββββββββββββββββββββββββββββββββββββββ */
|
|
|
|
| 437 |
line-height: 1.5;
|
| 438 |
}
|
| 439 |
|
| 440 |
+
/* βββββββββββββββββββββββββββββββββββββββββββ
|
| 441 |
+
Diagnostics
|
| 442 |
+
βββββββββββββββββββββββββββββββββββββββββββ */
|
| 443 |
+
|
| 444 |
+
.diagnostics {
|
| 445 |
+
display: flex;
|
| 446 |
+
flex-direction: column;
|
| 447 |
+
gap: 0.5rem;
|
| 448 |
+
}
|
| 449 |
+
|
| 450 |
+
.diag-item {
|
| 451 |
+
display: grid;
|
| 452 |
+
grid-template-columns: 150px 100px 1fr;
|
| 453 |
+
gap: 1rem;
|
| 454 |
+
padding: 0.75rem;
|
| 455 |
+
background: var(--background);
|
| 456 |
+
border-radius: 6px;
|
| 457 |
+
align-items: center;
|
| 458 |
+
}
|
| 459 |
+
|
| 460 |
+
.diag-name {
|
| 461 |
+
font-weight: 600;
|
| 462 |
+
color: var(--text-muted);
|
| 463 |
+
}
|
| 464 |
+
|
| 465 |
+
.diag-status {
|
| 466 |
+
font-weight: 600;
|
| 467 |
+
}
|
| 468 |
+
|
| 469 |
+
.diag-value {
|
| 470 |
+
font-family: 'JetBrains Mono', 'Fira Code', monospace;
|
| 471 |
+
font-size: 0.85rem;
|
| 472 |
+
color: var(--text-muted);
|
| 473 |
+
word-break: break-all;
|
| 474 |
+
}
|
| 475 |
+
|
| 476 |
+
.diag-overall {
|
| 477 |
+
margin-top: 1rem;
|
| 478 |
+
padding-top: 1rem;
|
| 479 |
+
border-top: 1px solid var(--border);
|
| 480 |
+
text-align: center;
|
| 481 |
+
}
|
| 482 |
+
|
| 483 |
/* βββββββββββββββββββββββββββββββββββββββββββ
|
| 484 |
Footer
|
| 485 |
βββββββββββββββββββββββββββββββββββββββββββ */
|