Spaces:
Paused
Paused
| #!/usr/bin/env python3 | |
| import os | |
| import sys | |
| import subprocess | |
| import datetime | |
| import sqlite3 | |
| from pathlib import Path | |
| from io import BytesIO | |
| from huggingface_hub import HfApi, HfFileSystem, hf_hub_download | |
| import tempfile | |
| # Set up path to include the application module | |
| SCRIPT_DIR = Path(__file__).parent.resolve() | |
| BACKEND_DIR = SCRIPT_DIR.parent | |
| sys.path.append(str(BACKEND_DIR)) | |
| # Database path (actual application database) | |
| DATA_DIR = os.environ.get("DATA_DIR", "/app/backend/data") | |
| DB_FILE_PATH = os.path.join(DATA_DIR, "webui.db") | |
| # Hugging Face repo paths (virtual paths in HF storage) | |
| REPO_DB_GPG_FILE = "db_backup/webui.db.gpg" | |
| REPO_TIMESTAMP_FILE = "db_backup/last_backup_time.txt" | |
| def verify_database(): | |
| """Verify database integrity.""" | |
| if not os.path.exists(DB_FILE_PATH): | |
| print(f"Database file not found at: {DB_FILE_PATH}") | |
| return False | |
| try: | |
| with sqlite3.connect(DB_FILE_PATH) as conn: | |
| cursor = conn.cursor() | |
| cursor.execute("PRAGMA integrity_check;") | |
| result = cursor.fetchone()[0] | |
| cursor.execute("SELECT name FROM sqlite_master WHERE type='table';") | |
| tables = cursor.fetchall() | |
| if result.lower() == "ok" and len(tables) > 0: | |
| print(f"Database verified: {len(tables)} tables found") | |
| return True | |
| print("Database verification failed") | |
| return False | |
| except Exception as e: | |
| print(f"Database verification error: {e}") | |
| return False | |
| def encrypt_database_to_memory(passphrase): | |
| """Encrypt database directly to a memory buffer.""" | |
| try: | |
| # Create a secure temporary directory for GPG | |
| with tempfile.TemporaryDirectory(prefix='gpg_home_') as gpg_home: | |
| os.chmod(gpg_home, 0o700) | |
| encrypt_cmd = [ | |
| "gpg", | |
| "--batch", | |
| "--yes", | |
| "--homedir", gpg_home, | |
| "--passphrase", passphrase, | |
| "--pinentry-mode", "loopback", | |
| "-c", | |
| "--cipher-algo", "AES256", | |
| "-o", "-", # Output to stdout | |
| DB_FILE_PATH | |
| ] | |
| # Run GPG and capture output directly | |
| result = subprocess.run( | |
| encrypt_cmd, | |
| capture_output=True, | |
| check=True | |
| ) | |
| if result.returncode != 0: | |
| print(f"GPG encryption failed: {result.stderr.decode()}") | |
| return None | |
| return result.stdout | |
| except subprocess.CalledProcessError as e: | |
| print(f"GPG process error: {e.stderr.decode()}") | |
| return None | |
| except Exception as e: | |
| print(f"Encryption error: {e}") | |
| return None | |
| def get_last_backup_time(repo_id, hf_token): | |
| """Get timestamp of last backup from HuggingFace.""" | |
| try: | |
| api = HfApi() | |
| files = api.list_repo_files(repo_id=repo_id, repo_type="space", token=hf_token) | |
| if REPO_TIMESTAMP_FILE not in files: | |
| return None | |
| tmp_file = hf_hub_download( | |
| repo_id=repo_id, | |
| repo_type="space", | |
| filename=REPO_TIMESTAMP_FILE, | |
| token=hf_token | |
| ) | |
| with open(tmp_file, "r", encoding="utf-8") as f: | |
| stamp_str = f.read().strip() | |
| return datetime.datetime.fromisoformat(stamp_str) | |
| except Exception as e: | |
| print(f"Error getting last backup time: {e}") | |
| return None | |
| def backup_db(): | |
| """Main backup function using streaming approach.""" | |
| # Validate environment | |
| passphrase = os.environ.get("BACKUP_PASSPHRASE") | |
| hf_token = os.environ.get("HF_TOKEN") | |
| space_id = os.environ.get("SPACE_ID") | |
| if not all([passphrase, hf_token, space_id]): | |
| print("Error: Missing required environment variables") | |
| return False | |
| # Check backup threshold | |
| threshold_minutes = int(os.environ.get("BACKUP_THRESHOLD_MINUTES", 120)) | |
| if threshold_minutes > 0: | |
| last_backup_dt = get_last_backup_time(space_id, hf_token) | |
| if last_backup_dt is not None: | |
| now = datetime.datetime.now(datetime.timezone.utc) | |
| if not last_backup_dt.tzinfo: | |
| last_backup_dt = last_backup_dt.replace(tzinfo=datetime.timezone.utc) | |
| elapsed = now - last_backup_dt | |
| if elapsed.total_seconds() < threshold_minutes * 60: | |
| print(f"Last backup was {elapsed.total_seconds()/60:.1f} min ago (threshold: {threshold_minutes})") | |
| return True | |
| # Verify database before backup | |
| if not verify_database(): | |
| return False | |
| # Encrypt database to memory | |
| print("Encrypting database...") | |
| encrypted_data = encrypt_database_to_memory(passphrase) | |
| if encrypted_data is None: | |
| return False | |
| print(f"Database encrypted successfully: {len(encrypted_data)} bytes") | |
| # Generate timestamp | |
| timestamp = datetime.datetime.now(datetime.timezone.utc).isoformat() | |
| timestamp_bytes = timestamp.encode('utf-8') | |
| # Upload both files to HuggingFace | |
| try: | |
| api = HfApi() | |
| api.upload_file( | |
| path_or_fileobj=BytesIO(encrypted_data), | |
| path_in_repo=REPO_DB_GPG_FILE, | |
| repo_id=space_id, | |
| repo_type="space", | |
| token=hf_token | |
| ) | |
| api.upload_file( | |
| path_or_fileobj=BytesIO(timestamp_bytes), | |
| path_in_repo=REPO_TIMESTAMP_FILE, | |
| repo_id=space_id, | |
| repo_type="space", | |
| token=hf_token | |
| ) | |
| print("Backup completed successfully!") | |
| return True | |
| except Exception as e: | |
| print(f"Error uploading to HuggingFace: {e}") | |
| return False | |
| if __name__ == "__main__": | |
| success = backup_db() | |
| sys.exit(0 if success else 1) | |