|
|
|
|
|
import os |
|
|
import sys |
|
|
import subprocess |
|
|
import datetime |
|
|
import sqlite3 |
|
|
from pathlib import Path |
|
|
from io import BytesIO |
|
|
from huggingface_hub import HfApi, HfFileSystem, hf_hub_download |
|
|
import tempfile |
|
|
|
|
|
|
|
|
SCRIPT_DIR = Path(__file__).parent.resolve() |
|
|
BACKEND_DIR = SCRIPT_DIR.parent |
|
|
sys.path.append(str(BACKEND_DIR)) |
|
|
|
|
|
|
|
|
DATA_DIR = os.environ.get("DATA_DIR", "/app/backend/data") |
|
|
DB_FILE_PATH = os.path.join(DATA_DIR, "webui.db") |
|
|
|
|
|
|
|
|
REPO_DB_GPG_FILE = "db_backup/webui.db.gpg" |
|
|
REPO_TIMESTAMP_FILE = "db_backup/last_backup_time.txt" |
|
|
|
|
|
|
|
|
def verify_database(): |
|
|
"""Verify database integrity.""" |
|
|
if not os.path.exists(DB_FILE_PATH): |
|
|
print(f"Database file not found at: {DB_FILE_PATH}") |
|
|
return False |
|
|
|
|
|
try: |
|
|
with sqlite3.connect(DB_FILE_PATH) as conn: |
|
|
cursor = conn.cursor() |
|
|
cursor.execute("PRAGMA integrity_check;") |
|
|
result = cursor.fetchone()[0] |
|
|
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';") |
|
|
tables = cursor.fetchall() |
|
|
|
|
|
if result.lower() == "ok" and len(tables) > 0: |
|
|
print(f"Database verified: {len(tables)} tables found") |
|
|
return True |
|
|
print("Database verification failed") |
|
|
return False |
|
|
except Exception as e: |
|
|
print(f"Database verification error: {e}") |
|
|
return False |
|
|
|
|
|
|
|
|
def encrypt_database_to_memory(passphrase): |
|
|
"""Encrypt database directly to a memory buffer.""" |
|
|
try: |
|
|
|
|
|
with tempfile.TemporaryDirectory(prefix='gpg_home_') as gpg_home: |
|
|
os.chmod(gpg_home, 0o700) |
|
|
|
|
|
encrypt_cmd = [ |
|
|
"gpg", |
|
|
"--batch", |
|
|
"--yes", |
|
|
"--homedir", gpg_home, |
|
|
"--passphrase", passphrase, |
|
|
"--pinentry-mode", "loopback", |
|
|
"-c", |
|
|
"--cipher-algo", "AES256", |
|
|
"-o", "-", |
|
|
DB_FILE_PATH |
|
|
] |
|
|
|
|
|
|
|
|
result = subprocess.run( |
|
|
encrypt_cmd, |
|
|
capture_output=True, |
|
|
check=True |
|
|
) |
|
|
|
|
|
if result.returncode != 0: |
|
|
print(f"GPG encryption failed: {result.stderr.decode()}") |
|
|
return None |
|
|
|
|
|
return result.stdout |
|
|
except subprocess.CalledProcessError as e: |
|
|
print(f"GPG process error: {e.stderr.decode()}") |
|
|
return None |
|
|
except Exception as e: |
|
|
print(f"Encryption error: {e}") |
|
|
return None |
|
|
|
|
|
|
|
|
def get_last_backup_time(repo_id, hf_token): |
|
|
"""Get timestamp of last backup from HuggingFace.""" |
|
|
try: |
|
|
api = HfApi() |
|
|
files = api.list_repo_files(repo_id=repo_id, repo_type="space", token=hf_token) |
|
|
if REPO_TIMESTAMP_FILE not in files: |
|
|
return None |
|
|
|
|
|
tmp_file = hf_hub_download( |
|
|
repo_id=repo_id, |
|
|
repo_type="space", |
|
|
filename=REPO_TIMESTAMP_FILE, |
|
|
token=hf_token |
|
|
) |
|
|
with open(tmp_file, "r", encoding="utf-8") as f: |
|
|
stamp_str = f.read().strip() |
|
|
return datetime.datetime.fromisoformat(stamp_str) |
|
|
except Exception as e: |
|
|
print(f"Error getting last backup time: {e}") |
|
|
return None |
|
|
|
|
|
|
|
|
def backup_db(): |
|
|
"""Main backup function using streaming approach.""" |
|
|
|
|
|
passphrase = os.environ.get("BACKUP_PASSPHRASE") |
|
|
hf_token = os.environ.get("HF_TOKEN") |
|
|
space_id = os.environ.get("SPACE_ID") |
|
|
|
|
|
if not all([passphrase, hf_token, space_id]): |
|
|
print("Error: Missing required environment variables") |
|
|
return False |
|
|
|
|
|
|
|
|
threshold_minutes = int(os.environ.get("BACKUP_THRESHOLD_MINUTES", 120)) |
|
|
if threshold_minutes > 0: |
|
|
last_backup_dt = get_last_backup_time(space_id, hf_token) |
|
|
if last_backup_dt is not None: |
|
|
now = datetime.datetime.now(datetime.timezone.utc) |
|
|
if not last_backup_dt.tzinfo: |
|
|
last_backup_dt = last_backup_dt.replace(tzinfo=datetime.timezone.utc) |
|
|
elapsed = now - last_backup_dt |
|
|
if elapsed.total_seconds() < threshold_minutes * 60: |
|
|
print(f"Last backup was {elapsed.total_seconds()/60:.1f} min ago (threshold: {threshold_minutes})") |
|
|
return True |
|
|
|
|
|
|
|
|
if not verify_database(): |
|
|
return False |
|
|
|
|
|
|
|
|
print("Encrypting database...") |
|
|
encrypted_data = encrypt_database_to_memory(passphrase) |
|
|
if encrypted_data is None: |
|
|
return False |
|
|
print(f"Database encrypted successfully: {len(encrypted_data)} bytes") |
|
|
|
|
|
|
|
|
timestamp = datetime.datetime.now(datetime.timezone.utc).isoformat() |
|
|
timestamp_bytes = timestamp.encode('utf-8') |
|
|
|
|
|
|
|
|
try: |
|
|
api = HfApi() |
|
|
api.upload_file( |
|
|
path_or_fileobj=BytesIO(encrypted_data), |
|
|
path_in_repo=REPO_DB_GPG_FILE, |
|
|
repo_id=space_id, |
|
|
repo_type="space", |
|
|
token=hf_token |
|
|
) |
|
|
api.upload_file( |
|
|
path_or_fileobj=BytesIO(timestamp_bytes), |
|
|
path_in_repo=REPO_TIMESTAMP_FILE, |
|
|
repo_id=space_id, |
|
|
repo_type="space", |
|
|
token=hf_token |
|
|
) |
|
|
print("Backup completed successfully!") |
|
|
return True |
|
|
except Exception as e: |
|
|
print(f"Error uploading to HuggingFace: {e}") |
|
|
return False |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
success = backup_db() |
|
|
sys.exit(0 if success else 1) |
|
|
|