Thomas Gobin
backup implementation
39b5c97
#!/usr/bin/env python3
import os
import sys
import subprocess
import datetime
import sqlite3
from pathlib import Path
from io import BytesIO
from huggingface_hub import HfApi, HfFileSystem, hf_hub_download
import tempfile
# Set up path to include the application module
SCRIPT_DIR = Path(__file__).parent.resolve()
BACKEND_DIR = SCRIPT_DIR.parent
sys.path.append(str(BACKEND_DIR))
# Database path (actual application database)
DATA_DIR = os.environ.get("DATA_DIR", "/app/backend/data")
DB_FILE_PATH = os.path.join(DATA_DIR, "webui.db")
# Hugging Face repo paths (virtual paths in HF storage)
REPO_DB_GPG_FILE = "db_backup/webui.db.gpg"
REPO_TIMESTAMP_FILE = "db_backup/last_backup_time.txt"
def verify_database():
"""Verify database integrity."""
if not os.path.exists(DB_FILE_PATH):
print(f"Database file not found at: {DB_FILE_PATH}")
return False
try:
with sqlite3.connect(DB_FILE_PATH) as conn:
cursor = conn.cursor()
cursor.execute("PRAGMA integrity_check;")
result = cursor.fetchone()[0]
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
tables = cursor.fetchall()
if result.lower() == "ok" and len(tables) > 0:
print(f"Database verified: {len(tables)} tables found")
return True
print("Database verification failed")
return False
except Exception as e:
print(f"Database verification error: {e}")
return False
def encrypt_database_to_memory(passphrase):
"""Encrypt database directly to a memory buffer."""
try:
# Create a secure temporary directory for GPG
with tempfile.TemporaryDirectory(prefix='gpg_home_') as gpg_home:
os.chmod(gpg_home, 0o700)
encrypt_cmd = [
"gpg",
"--batch",
"--yes",
"--homedir", gpg_home,
"--passphrase", passphrase,
"--pinentry-mode", "loopback",
"-c",
"--cipher-algo", "AES256",
"-o", "-", # Output to stdout
DB_FILE_PATH
]
# Run GPG and capture output directly
result = subprocess.run(
encrypt_cmd,
capture_output=True,
check=True
)
if result.returncode != 0:
print(f"GPG encryption failed: {result.stderr.decode()}")
return None
return result.stdout
except subprocess.CalledProcessError as e:
print(f"GPG process error: {e.stderr.decode()}")
return None
except Exception as e:
print(f"Encryption error: {e}")
return None
def get_last_backup_time(repo_id, hf_token):
"""Get timestamp of last backup from HuggingFace."""
try:
api = HfApi()
files = api.list_repo_files(repo_id=repo_id, repo_type="space", token=hf_token)
if REPO_TIMESTAMP_FILE not in files:
return None
tmp_file = hf_hub_download(
repo_id=repo_id,
repo_type="space",
filename=REPO_TIMESTAMP_FILE,
token=hf_token
)
with open(tmp_file, "r", encoding="utf-8") as f:
stamp_str = f.read().strip()
return datetime.datetime.fromisoformat(stamp_str)
except Exception as e:
print(f"Error getting last backup time: {e}")
return None
def backup_db():
"""Main backup function using streaming approach."""
# Validate environment
passphrase = os.environ.get("BACKUP_PASSPHRASE")
hf_token = os.environ.get("HF_TOKEN")
space_id = os.environ.get("SPACE_ID")
if not all([passphrase, hf_token, space_id]):
print("Error: Missing required environment variables")
return False
# Check backup threshold
threshold_minutes = int(os.environ.get("BACKUP_THRESHOLD_MINUTES", 120))
if threshold_minutes > 0:
last_backup_dt = get_last_backup_time(space_id, hf_token)
if last_backup_dt is not None:
now = datetime.datetime.now(datetime.timezone.utc)
if not last_backup_dt.tzinfo:
last_backup_dt = last_backup_dt.replace(tzinfo=datetime.timezone.utc)
elapsed = now - last_backup_dt
if elapsed.total_seconds() < threshold_minutes * 60:
print(f"Last backup was {elapsed.total_seconds()/60:.1f} min ago (threshold: {threshold_minutes})")
return True
# Verify database before backup
if not verify_database():
return False
# Encrypt database to memory
print("Encrypting database...")
encrypted_data = encrypt_database_to_memory(passphrase)
if encrypted_data is None:
return False
print(f"Database encrypted successfully: {len(encrypted_data)} bytes")
# Generate timestamp
timestamp = datetime.datetime.now(datetime.timezone.utc).isoformat()
timestamp_bytes = timestamp.encode('utf-8')
# Upload both files to HuggingFace
try:
api = HfApi()
api.upload_file(
path_or_fileobj=BytesIO(encrypted_data),
path_in_repo=REPO_DB_GPG_FILE,
repo_id=space_id,
repo_type="space",
token=hf_token
)
api.upload_file(
path_or_fileobj=BytesIO(timestamp_bytes),
path_in_repo=REPO_TIMESTAMP_FILE,
repo_id=space_id,
repo_type="space",
token=hf_token
)
print("Backup completed successfully!")
return True
except Exception as e:
print(f"Error uploading to HuggingFace: {e}")
return False
if __name__ == "__main__":
success = backup_db()
sys.exit(0 if success else 1)