Spaces:
Runtime error
Runtime error
File size: 6,732 Bytes
4ee904e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 | # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# π PATH: sync.py (root HF Space repo)
# Updated for Free Models - No API Key Leakage
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
import os
import sys
import tarfile
import time
from datetime import datetime
from huggingface_hub import HfApi, hf_hub_download
api = HfApi()
repo_id = os.getenv("HF_DATASET")
token = os.getenv("HF_TOKEN")
# Backup files
MAIN_BACKUP = "openclaw_backup.tar.gz"
BROWSER_BACKUP = "browser_backup.tar.gz"
# Items to skip from main backup (these are recreated or too large)
SKIP_ITEMS = {
"openclaw.json", # Recreated from env vars on each start
"browsers", # Backed up separately (large file)
"wechat-data", # Can be large, optional
"cache", # Temporary cache
".cache", # Hidden cache
}
# Items to always backup (core data)
CORE_ITEMS = {
"sessions", # Chat history
"workspace", # User files and memory
"agents", # Agent configurations
"memory", # Long-term memory
"credentials", # But only if they don't contain keys
}
def should_skip(item_name: str) -> bool:
"""Check if an item should be skipped from backup"""
return item_name in SKIP_ITEMS or item_name.startswith('.') or item_name.endswith('.tmp')
def restore():
"""Restore from latest backup on startup"""
if not repo_id or not token:
print("β οΈ Skip Restore: HF_DATASET or HF_TOKEN not configured")
print(" β Fresh installation (normal for first deploy)")
return
# Restore main backup (sessions, workspace, memory)
try:
print(f"π₯ Downloading {MAIN_BACKUP} from {repo_id}...")
path = hf_hub_download(
repo_id=repo_id,
filename=MAIN_BACKUP,
repo_type="dataset",
token=token
)
with tarfile.open(path, "r:gz") as tar:
tar.extractall(path="/root/.openclaw/")
print(f"β
Restored from {MAIN_BACKUP}")
except Exception as e:
print(f"βΉοΈ No previous backup found: {e}")
print(" β This is normal for first deployment")
# Restore browser binaries (if they exist in dataset)
try:
print(f"π₯ Checking for browser backup...")
browser_path = hf_hub_download(
repo_id=repo_id,
filename=BROWSER_BACKUP,
repo_type="dataset",
token=token
)
with tarfile.open(browser_path, "r:gz") as tar:
tar.extractall(path="/root/.openclaw/")
print(f"β
Browser binaries restored")
except Exception:
print(f"βΉοΈ No browser backup found - will install fresh if needed")
def backup():
"""Backup current state (runs every hour)"""
if not repo_id or not token:
print("β οΈ Skip Backup: HF_DATASET or HF_TOKEN not configured")
return
base_dir = "/root/.openclaw"
# ββ Backup 1: Core data (sessions, workspace, memory) ββββββ
if not os.path.exists(base_dir):
print(f"β οΈ {base_dir} does not exist - nothing to backup")
return
try:
print(f"π¦ Creating backup archive...")
items_backed_up = []
items_skipped = []
with tarfile.open(MAIN_BACKUP, "w:gz") as tar:
for item in os.listdir(base_dir):
if should_skip(item):
items_skipped.append(item)
continue
full_path = os.path.join(base_dir, item)
if os.path.exists(full_path):
tar.add(full_path, arcname=item)
items_backed_up.append(item)
print(f" β
Backed up: {', '.join(items_backed_up)}")
if items_skipped:
print(f" βοΈ Skipped: {', '.join(items_skipped)}")
# Upload to Hugging Face Dataset
print(f"π€ Uploading {MAIN_BACKUP} to {repo_id}...")
api.upload_file(
path_or_fileobj=MAIN_BACKUP,
path_in_repo=MAIN_BACKUP,
repo_id=repo_id,
repo_type="dataset",
token=token
)
print(f"β
Backup complete - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
except Exception as e:
print(f"β Backup error: {e}")
# ββ Backup 2: Browser binaries (only once, not every time) ββ
try:
browsers_dir = "/root/.openclaw/browsers"
if not os.path.exists(browsers_dir):
return
# Check if browser backup already exists in dataset
try:
hf_hub_download(
repo_id=repo_id,
filename=BROWSER_BACKUP,
repo_type="dataset",
token=token
)
print(f" Browser backup already exists - skipping upload")
except Exception:
# First time - upload browser binaries
print(f"π¦ First time: uploading browser binaries (~150MB)...")
with tarfile.open(BROWSER_BACKUP, "w:gz") as tar:
tar.add(browsers_dir, arcname="browsers")
api.upload_file(
path_or_fileobj=BROWSER_BACKUP,
path_in_repo=BROWSER_BACKUP,
repo_id=repo_id,
repo_type="dataset",
token=token
)
print(f"β
Browser backup uploaded (will not upload again)")
except Exception as e:
print(f"β οΈ Browser backup skipped: {e}")
def list_backups():
"""List available backups in dataset"""
if not repo_id or not token:
print("β οΈ HF_DATASET or HF_TOKEN not configured")
return
try:
files = api.list_repo_files(repo_id=repo_id, repo_type="dataset", token=token)
backup_files = [f for f in files if f.endswith('.tar.gz')]
if backup_files:
print(f"\nπ Available backups in {repo_id}:")
for f in sorted(backup_files, reverse=True):
print(f" - {f}")
else:
print(f"π No backups found in {repo_id}")
except Exception as e:
print(f"β Error listing backups: {e}")
if __name__ == "__main__":
if len(sys.argv) > 1:
if sys.argv[1] == "backup":
backup()
elif sys.argv[1] == "list":
list_backups()
elif sys.argv[1] == "restore":
restore()
else:
restore() |