QuantumCoreWorkerHead

Runtime error

App Files Files Community

QuantumCoreWorkerHead / sync.py

yamiclaws

Create sync.py

4ee904e verified 21 days ago

raw

history blame contribute delete

6.73 kB

	# ─────────────────────────────────────────────────────────────
	# 📁 PATH: sync.py (root HF Space repo)
	# Updated for Free Models - No API Key Leakage
	# ─────────────────────────────────────────────────────────────

	import os
	import sys
	import tarfile
	import time
	from datetime import datetime
	from huggingface_hub import HfApi, hf_hub_download

	api = HfApi()
	repo_id = os.getenv("HF_DATASET")
	token = os.getenv("HF_TOKEN")

	# Backup files
	MAIN_BACKUP = "openclaw_backup.tar.gz"
	BROWSER_BACKUP = "browser_backup.tar.gz"

	# Items to skip from main backup (these are recreated or too large)
	SKIP_ITEMS = {
	"openclaw.json", # Recreated from env vars on each start
	"browsers", # Backed up separately (large file)
	"wechat-data", # Can be large, optional
	"cache", # Temporary cache
	".cache", # Hidden cache
	}

	# Items to always backup (core data)
	CORE_ITEMS = {
	"sessions", # Chat history
	"workspace", # User files and memory
	"agents", # Agent configurations
	"memory", # Long-term memory
	"credentials", # But only if they don't contain keys
	}


	def should_skip(item_name: str) -> bool:
	"""Check if an item should be skipped from backup"""
	return item_name in SKIP_ITEMS or item_name.startswith('.') or item_name.endswith('.tmp')


	def restore():
	"""Restore from latest backup on startup"""
	if not repo_id or not token:
	print("⚠️ Skip Restore: HF_DATASET or HF_TOKEN not configured")
	print(" → Fresh installation (normal for first deploy)")
	return

	# Restore main backup (sessions, workspace, memory)
	try:
	print(f"📥 Downloading {MAIN_BACKUP} from {repo_id}...")
	path = hf_hub_download(
	repo_id=repo_id,
	filename=MAIN_BACKUP,
	repo_type="dataset",
	token=token
	)
	with tarfile.open(path, "r:gz") as tar:
	tar.extractall(path="/root/.openclaw/")
	print(f"✅ Restored from {MAIN_BACKUP}")

	except Exception as e:
	print(f"ℹ️ No previous backup found: {e}")
	print(" → This is normal for first deployment")

	# Restore browser binaries (if they exist in dataset)
	try:
	print(f"📥 Checking for browser backup...")
	browser_path = hf_hub_download(
	repo_id=repo_id,
	filename=BROWSER_BACKUP,
	repo_type="dataset",
	token=token
	)
	with tarfile.open(browser_path, "r:gz") as tar:
	tar.extractall(path="/root/.openclaw/")
	print(f"✅ Browser binaries restored")

	except Exception:
	print(f"ℹ️ No browser backup found - will install fresh if needed")


	def backup():
	"""Backup current state (runs every hour)"""
	if not repo_id or not token:
	print("⚠️ Skip Backup: HF_DATASET or HF_TOKEN not configured")
	return

	base_dir = "/root/.openclaw"

	# ── Backup 1: Core data (sessions, workspace, memory) ──────
	if not os.path.exists(base_dir):
	print(f"⚠️ {base_dir} does not exist - nothing to backup")
	return

	try:
	print(f"📦 Creating backup archive...")
	items_backed_up = []
	items_skipped = []

	with tarfile.open(MAIN_BACKUP, "w:gz") as tar:
	for item in os.listdir(base_dir):
	if should_skip(item):
	items_skipped.append(item)
	continue

	full_path = os.path.join(base_dir, item)
	if os.path.exists(full_path):
	tar.add(full_path, arcname=item)
	items_backed_up.append(item)

	print(f" ✅ Backed up: {', '.join(items_backed_up)}")
	if items_skipped:
	print(f" ⏭️ Skipped: {', '.join(items_skipped)}")

	# Upload to Hugging Face Dataset
	print(f"📤 Uploading {MAIN_BACKUP} to {repo_id}...")
	api.upload_file(
	path_or_fileobj=MAIN_BACKUP,
	path_in_repo=MAIN_BACKUP,
	repo_id=repo_id,
	repo_type="dataset",
	token=token
	)
	print(f"✅ Backup complete - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

	except Exception as e:
	print(f"❌ Backup error: {e}")

	# ── Backup 2: Browser binaries (only once, not every time) ──
	try:
	browsers_dir = "/root/.openclaw/browsers"
	if not os.path.exists(browsers_dir):
	return

	# Check if browser backup already exists in dataset
	try:
	hf_hub_download(
	repo_id=repo_id,
	filename=BROWSER_BACKUP,
	repo_type="dataset",
	token=token
	)
	print(f" Browser backup already exists - skipping upload")

	except Exception:
	# First time - upload browser binaries
	print(f"📦 First time: uploading browser binaries (~150MB)...")
	with tarfile.open(BROWSER_BACKUP, "w:gz") as tar:
	tar.add(browsers_dir, arcname="browsers")

	api.upload_file(
	path_or_fileobj=BROWSER_BACKUP,
	path_in_repo=BROWSER_BACKUP,
	repo_id=repo_id,
	repo_type="dataset",
	token=token
	)
	print(f"✅ Browser backup uploaded (will not upload again)")

	except Exception as e:
	print(f"⚠️ Browser backup skipped: {e}")


	def list_backups():
	"""List available backups in dataset"""
	if not repo_id or not token:
	print("⚠️ HF_DATASET or HF_TOKEN not configured")
	return

	try:
	files = api.list_repo_files(repo_id=repo_id, repo_type="dataset", token=token)
	backup_files = [f for f in files if f.endswith('.tar.gz')]

	if backup_files:
	print(f"\n📋 Available backups in {repo_id}:")
	for f in sorted(backup_files, reverse=True):
	print(f" - {f}")
	else:
	print(f"📋 No backups found in {repo_id}")

	except Exception as e:
	print(f"❌ Error listing backups: {e}")


	if __name__ == "__main__":
	if len(sys.argv) > 1:
	if sys.argv[1] == "backup":
	backup()
	elif sys.argv[1] == "list":
	list_backups()
	elif sys.argv[1] == "restore":
	restore()
	else:
	restore()