# ───────────────────────────────────────────────────────────── # 📁 PATH: sync.py (root HF Space repo) # Updated for Free Models - No API Key Leakage # ───────────────────────────────────────────────────────────── import os import sys import tarfile import time from datetime import datetime from huggingface_hub import HfApi, hf_hub_download api = HfApi() repo_id = os.getenv("HF_DATASET") token = os.getenv("HF_TOKEN") # Backup files MAIN_BACKUP = "openclaw_backup.tar.gz" BROWSER_BACKUP = "browser_backup.tar.gz" # Items to skip from main backup (these are recreated or too large) SKIP_ITEMS = { "openclaw.json", # Recreated from env vars on each start "browsers", # Backed up separately (large file) "wechat-data", # Can be large, optional "cache", # Temporary cache ".cache", # Hidden cache } # Items to always backup (core data) CORE_ITEMS = { "sessions", # Chat history "workspace", # User files and memory "agents", # Agent configurations "memory", # Long-term memory "credentials", # But only if they don't contain keys } def should_skip(item_name: str) -> bool: """Check if an item should be skipped from backup""" return item_name in SKIP_ITEMS or item_name.startswith('.') or item_name.endswith('.tmp') def restore(): """Restore from latest backup on startup""" if not repo_id or not token: print("⚠️ Skip Restore: HF_DATASET or HF_TOKEN not configured") print(" → Fresh installation (normal for first deploy)") return # Restore main backup (sessions, workspace, memory) try: print(f"📥 Downloading {MAIN_BACKUP} from {repo_id}...") path = hf_hub_download( repo_id=repo_id, filename=MAIN_BACKUP, repo_type="dataset", token=token ) with tarfile.open(path, "r:gz") as tar: tar.extractall(path="/root/.openclaw/") print(f"✅ Restored from {MAIN_BACKUP}") except Exception as e: print(f"ℹ️ No previous backup found: {e}") print(" → This is normal for first deployment") # Restore browser binaries (if they exist in dataset) try: print(f"📥 Checking for browser backup...") browser_path = hf_hub_download( repo_id=repo_id, filename=BROWSER_BACKUP, repo_type="dataset", token=token ) with tarfile.open(browser_path, "r:gz") as tar: tar.extractall(path="/root/.openclaw/") print(f"✅ Browser binaries restored") except Exception: print(f"ℹ️ No browser backup found - will install fresh if needed") def backup(): """Backup current state (runs every hour)""" if not repo_id or not token: print("⚠️ Skip Backup: HF_DATASET or HF_TOKEN not configured") return base_dir = "/root/.openclaw" # ── Backup 1: Core data (sessions, workspace, memory) ────── if not os.path.exists(base_dir): print(f"⚠️ {base_dir} does not exist - nothing to backup") return try: print(f"📦 Creating backup archive...") items_backed_up = [] items_skipped = [] with tarfile.open(MAIN_BACKUP, "w:gz") as tar: for item in os.listdir(base_dir): if should_skip(item): items_skipped.append(item) continue full_path = os.path.join(base_dir, item) if os.path.exists(full_path): tar.add(full_path, arcname=item) items_backed_up.append(item) print(f" ✅ Backed up: {', '.join(items_backed_up)}") if items_skipped: print(f" ⏭️ Skipped: {', '.join(items_skipped)}") # Upload to Hugging Face Dataset print(f"📤 Uploading {MAIN_BACKUP} to {repo_id}...") api.upload_file( path_or_fileobj=MAIN_BACKUP, path_in_repo=MAIN_BACKUP, repo_id=repo_id, repo_type="dataset", token=token ) print(f"✅ Backup complete - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") except Exception as e: print(f"❌ Backup error: {e}") # ── Backup 2: Browser binaries (only once, not every time) ── try: browsers_dir = "/root/.openclaw/browsers" if not os.path.exists(browsers_dir): return # Check if browser backup already exists in dataset try: hf_hub_download( repo_id=repo_id, filename=BROWSER_BACKUP, repo_type="dataset", token=token ) print(f" Browser backup already exists - skipping upload") except Exception: # First time - upload browser binaries print(f"📦 First time: uploading browser binaries (~150MB)...") with tarfile.open(BROWSER_BACKUP, "w:gz") as tar: tar.add(browsers_dir, arcname="browsers") api.upload_file( path_or_fileobj=BROWSER_BACKUP, path_in_repo=BROWSER_BACKUP, repo_id=repo_id, repo_type="dataset", token=token ) print(f"✅ Browser backup uploaded (will not upload again)") except Exception as e: print(f"⚠️ Browser backup skipped: {e}") def list_backups(): """List available backups in dataset""" if not repo_id or not token: print("⚠️ HF_DATASET or HF_TOKEN not configured") return try: files = api.list_repo_files(repo_id=repo_id, repo_type="dataset", token=token) backup_files = [f for f in files if f.endswith('.tar.gz')] if backup_files: print(f"\n📋 Available backups in {repo_id}:") for f in sorted(backup_files, reverse=True): print(f" - {f}") else: print(f"📋 No backups found in {repo_id}") except Exception as e: print(f"❌ Error listing backups: {e}") if __name__ == "__main__": if len(sys.argv) > 1: if sys.argv[1] == "backup": backup() elif sys.argv[1] == "list": list_backups() elif sys.argv[1] == "restore": restore() else: restore()