yamiclaws's picture
Create sync.py
4ee904e verified
# ─────────────────────────────────────────────────────────────
# πŸ“ PATH: sync.py (root HF Space repo)
# Updated for Free Models - No API Key Leakage
# ─────────────────────────────────────────────────────────────
import os
import sys
import tarfile
import time
from datetime import datetime
from huggingface_hub import HfApi, hf_hub_download
api = HfApi()
repo_id = os.getenv("HF_DATASET")
token = os.getenv("HF_TOKEN")
# Backup files
MAIN_BACKUP = "openclaw_backup.tar.gz"
BROWSER_BACKUP = "browser_backup.tar.gz"
# Items to skip from main backup (these are recreated or too large)
SKIP_ITEMS = {
"openclaw.json", # Recreated from env vars on each start
"browsers", # Backed up separately (large file)
"wechat-data", # Can be large, optional
"cache", # Temporary cache
".cache", # Hidden cache
}
# Items to always backup (core data)
CORE_ITEMS = {
"sessions", # Chat history
"workspace", # User files and memory
"agents", # Agent configurations
"memory", # Long-term memory
"credentials", # But only if they don't contain keys
}
def should_skip(item_name: str) -> bool:
"""Check if an item should be skipped from backup"""
return item_name in SKIP_ITEMS or item_name.startswith('.') or item_name.endswith('.tmp')
def restore():
"""Restore from latest backup on startup"""
if not repo_id or not token:
print("⚠️ Skip Restore: HF_DATASET or HF_TOKEN not configured")
print(" β†’ Fresh installation (normal for first deploy)")
return
# Restore main backup (sessions, workspace, memory)
try:
print(f"πŸ“₯ Downloading {MAIN_BACKUP} from {repo_id}...")
path = hf_hub_download(
repo_id=repo_id,
filename=MAIN_BACKUP,
repo_type="dataset",
token=token
)
with tarfile.open(path, "r:gz") as tar:
tar.extractall(path="/root/.openclaw/")
print(f"βœ… Restored from {MAIN_BACKUP}")
except Exception as e:
print(f"ℹ️ No previous backup found: {e}")
print(" β†’ This is normal for first deployment")
# Restore browser binaries (if they exist in dataset)
try:
print(f"πŸ“₯ Checking for browser backup...")
browser_path = hf_hub_download(
repo_id=repo_id,
filename=BROWSER_BACKUP,
repo_type="dataset",
token=token
)
with tarfile.open(browser_path, "r:gz") as tar:
tar.extractall(path="/root/.openclaw/")
print(f"βœ… Browser binaries restored")
except Exception:
print(f"ℹ️ No browser backup found - will install fresh if needed")
def backup():
"""Backup current state (runs every hour)"""
if not repo_id or not token:
print("⚠️ Skip Backup: HF_DATASET or HF_TOKEN not configured")
return
base_dir = "/root/.openclaw"
# ── Backup 1: Core data (sessions, workspace, memory) ──────
if not os.path.exists(base_dir):
print(f"⚠️ {base_dir} does not exist - nothing to backup")
return
try:
print(f"πŸ“¦ Creating backup archive...")
items_backed_up = []
items_skipped = []
with tarfile.open(MAIN_BACKUP, "w:gz") as tar:
for item in os.listdir(base_dir):
if should_skip(item):
items_skipped.append(item)
continue
full_path = os.path.join(base_dir, item)
if os.path.exists(full_path):
tar.add(full_path, arcname=item)
items_backed_up.append(item)
print(f" βœ… Backed up: {', '.join(items_backed_up)}")
if items_skipped:
print(f" ⏭️ Skipped: {', '.join(items_skipped)}")
# Upload to Hugging Face Dataset
print(f"πŸ“€ Uploading {MAIN_BACKUP} to {repo_id}...")
api.upload_file(
path_or_fileobj=MAIN_BACKUP,
path_in_repo=MAIN_BACKUP,
repo_id=repo_id,
repo_type="dataset",
token=token
)
print(f"βœ… Backup complete - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
except Exception as e:
print(f"❌ Backup error: {e}")
# ── Backup 2: Browser binaries (only once, not every time) ──
try:
browsers_dir = "/root/.openclaw/browsers"
if not os.path.exists(browsers_dir):
return
# Check if browser backup already exists in dataset
try:
hf_hub_download(
repo_id=repo_id,
filename=BROWSER_BACKUP,
repo_type="dataset",
token=token
)
print(f" Browser backup already exists - skipping upload")
except Exception:
# First time - upload browser binaries
print(f"πŸ“¦ First time: uploading browser binaries (~150MB)...")
with tarfile.open(BROWSER_BACKUP, "w:gz") as tar:
tar.add(browsers_dir, arcname="browsers")
api.upload_file(
path_or_fileobj=BROWSER_BACKUP,
path_in_repo=BROWSER_BACKUP,
repo_id=repo_id,
repo_type="dataset",
token=token
)
print(f"βœ… Browser backup uploaded (will not upload again)")
except Exception as e:
print(f"⚠️ Browser backup skipped: {e}")
def list_backups():
"""List available backups in dataset"""
if not repo_id or not token:
print("⚠️ HF_DATASET or HF_TOKEN not configured")
return
try:
files = api.list_repo_files(repo_id=repo_id, repo_type="dataset", token=token)
backup_files = [f for f in files if f.endswith('.tar.gz')]
if backup_files:
print(f"\nπŸ“‹ Available backups in {repo_id}:")
for f in sorted(backup_files, reverse=True):
print(f" - {f}")
else:
print(f"πŸ“‹ No backups found in {repo_id}")
except Exception as e:
print(f"❌ Error listing backups: {e}")
if __name__ == "__main__":
if len(sys.argv) > 1:
if sys.argv[1] == "backup":
backup()
elif sys.argv[1] == "list":
list_backups()
elif sys.argv[1] == "restore":
restore()
else:
restore()