#!/usr/bin/env python3 import os import sys import subprocess import datetime import sqlite3 from pathlib import Path from io import BytesIO from huggingface_hub import HfApi, HfFileSystem, hf_hub_download import tempfile # Set up path to include the application module SCRIPT_DIR = Path(__file__).parent.resolve() BACKEND_DIR = SCRIPT_DIR.parent sys.path.append(str(BACKEND_DIR)) # Database path (actual application database) DATA_DIR = os.environ.get("DATA_DIR", "/app/backend/data") DB_FILE_PATH = os.path.join(DATA_DIR, "webui.db") # Hugging Face repo paths (virtual paths in HF storage) REPO_DB_GPG_FILE = "db_backup/webui.db.gpg" REPO_TIMESTAMP_FILE = "db_backup/last_backup_time.txt" def verify_database(): """Verify database integrity.""" if not os.path.exists(DB_FILE_PATH): print(f"Database file not found at: {DB_FILE_PATH}") return False try: with sqlite3.connect(DB_FILE_PATH) as conn: cursor = conn.cursor() cursor.execute("PRAGMA integrity_check;") result = cursor.fetchone()[0] cursor.execute("SELECT name FROM sqlite_master WHERE type='table';") tables = cursor.fetchall() if result.lower() == "ok" and len(tables) > 0: print(f"Database verified: {len(tables)} tables found") return True print("Database verification failed") return False except Exception as e: print(f"Database verification error: {e}") return False def encrypt_database_to_memory(passphrase): """Encrypt database directly to a memory buffer.""" try: # Create a secure temporary directory for GPG with tempfile.TemporaryDirectory(prefix='gpg_home_') as gpg_home: os.chmod(gpg_home, 0o700) encrypt_cmd = [ "gpg", "--batch", "--yes", "--homedir", gpg_home, "--passphrase", passphrase, "--pinentry-mode", "loopback", "-c", "--cipher-algo", "AES256", "-o", "-", # Output to stdout DB_FILE_PATH ] # Run GPG and capture output directly result = subprocess.run( encrypt_cmd, capture_output=True, check=True ) if result.returncode != 0: print(f"GPG encryption failed: {result.stderr.decode()}") return None return result.stdout except subprocess.CalledProcessError as e: print(f"GPG process error: {e.stderr.decode()}") return None except Exception as e: print(f"Encryption error: {e}") return None def get_last_backup_time(repo_id, hf_token): """Get timestamp of last backup from HuggingFace.""" try: api = HfApi() files = api.list_repo_files(repo_id=repo_id, repo_type="space", token=hf_token) if REPO_TIMESTAMP_FILE not in files: return None tmp_file = hf_hub_download( repo_id=repo_id, repo_type="space", filename=REPO_TIMESTAMP_FILE, token=hf_token ) with open(tmp_file, "r", encoding="utf-8") as f: stamp_str = f.read().strip() return datetime.datetime.fromisoformat(stamp_str) except Exception as e: print(f"Error getting last backup time: {e}") return None def backup_db(): """Main backup function using streaming approach.""" # Validate environment passphrase = os.environ.get("BACKUP_PASSPHRASE") hf_token = os.environ.get("HF_TOKEN") space_id = os.environ.get("SPACE_ID") if not all([passphrase, hf_token, space_id]): print("Error: Missing required environment variables") return False # Check backup threshold threshold_minutes = int(os.environ.get("BACKUP_THRESHOLD_MINUTES", 120)) if threshold_minutes > 0: last_backup_dt = get_last_backup_time(space_id, hf_token) if last_backup_dt is not None: now = datetime.datetime.now(datetime.timezone.utc) if not last_backup_dt.tzinfo: last_backup_dt = last_backup_dt.replace(tzinfo=datetime.timezone.utc) elapsed = now - last_backup_dt if elapsed.total_seconds() < threshold_minutes * 60: print(f"Last backup was {elapsed.total_seconds()/60:.1f} min ago (threshold: {threshold_minutes})") return True # Verify database before backup if not verify_database(): return False # Encrypt database to memory print("Encrypting database...") encrypted_data = encrypt_database_to_memory(passphrase) if encrypted_data is None: return False print(f"Database encrypted successfully: {len(encrypted_data)} bytes") # Generate timestamp timestamp = datetime.datetime.now(datetime.timezone.utc).isoformat() timestamp_bytes = timestamp.encode('utf-8') # Upload both files to HuggingFace try: api = HfApi() api.upload_file( path_or_fileobj=BytesIO(encrypted_data), path_in_repo=REPO_DB_GPG_FILE, repo_id=space_id, repo_type="space", token=hf_token ) api.upload_file( path_or_fileobj=BytesIO(timestamp_bytes), path_in_repo=REPO_TIMESTAMP_FILE, repo_id=space_id, repo_type="space", token=hf_token ) print("Backup completed successfully!") return True except Exception as e: print(f"Error uploading to HuggingFace: {e}") return False if __name__ == "__main__": success = backup_db() sys.exit(0 if success else 1)