Spaces:
Build error
Build error
| #!/usr/bin/env python3 | |
| """ | |
| OpenClaw HF Spaces Persistence β Full Directory Sync | |
| ===================================================== | |
| Simplified persistence: upload/download the entire ~/.openclaw directory | |
| as-is to/from a Hugging Face Dataset repo. | |
| - Startup: snapshot_download β ~/.openclaw | |
| - Periodic: upload_folder β dataset openclaw_data/ | |
| - Shutdown: final upload_folder β dataset openclaw_data/ | |
| """ | |
| import os | |
| import sys | |
| import time | |
| import threading | |
| import subprocess | |
| import signal | |
| import json | |
| import shutil | |
| import tempfile | |
| import traceback | |
| import re | |
| from pathlib import Path | |
| from datetime import datetime | |
| # Set timeout BEFORE importing huggingface_hub | |
| os.environ.setdefault("HF_HUB_DOWNLOAD_TIMEOUT", "300") | |
| os.environ.setdefault("HF_HUB_UPLOAD_TIMEOUT", "600") | |
| from huggingface_hub import HfApi, snapshot_download | |
| # ββ Logging helper ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class TeeLogger: | |
| """Duplicate output to stream and file.""" | |
| def __init__(self, filename, stream): | |
| self.stream = stream | |
| self.file = open(filename, "a", encoding="utf-8") | |
| def write(self, message): | |
| self.stream.write(message) | |
| self.file.write(message) | |
| self.flush() | |
| def flush(self): | |
| self.stream.flush() | |
| self.file.flush() | |
| def fileno(self): | |
| return self.stream.fileno() | |
| # ββ Configuration βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| HF_REPO_ID = os.environ.get("OPENCLAW_DATASET_REPO", "tao-shen/openclaw") | |
| HF_TOKEN = os.environ.get("HF_TOKEN") | |
| OPENCLAW_HOME = Path.home() / ".openclaw" | |
| APP_DIR = Path("/app/openclaw") | |
| # Use ".openclaw" - directly read/write the .openclaw folder in dataset | |
| DATASET_PATH = ".openclaw" | |
| TELEGRAM_BOT_TOKEN = os.environ.get("TELEGRAM_BOT_TOKEN", "") | |
| TELEGRAM_BOT_NAME = os.environ.get("TELEGRAM_BOT_NAME", "opentauronbot") | |
| TELEGRAM_ALLOW_USER = os.environ.get("TELEGRAM_ALLOW_USER", "taoshen1") | |
| SYNC_INTERVAL = int(os.environ.get("SYNC_INTERVAL", "120")) | |
| # Setup logging | |
| log_dir = OPENCLAW_HOME / "workspace" | |
| log_dir.mkdir(parents=True, exist_ok=True) | |
| sys.stdout = TeeLogger(log_dir / "sync.log", sys.stdout) | |
| sys.stderr = sys.stdout | |
| # ββ Sync Manager ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| class OpenClawFullSync: | |
| """Upload/download the entire ~/.openclaw directory to HF Dataset.""" | |
| def __init__(self): | |
| if not HF_TOKEN: | |
| print("[SYNC] WARNING: HF_TOKEN not set. Persistence disabled.") | |
| self.enabled = False | |
| return | |
| self.enabled = True | |
| self.api = HfApi(token=HF_TOKEN) | |
| self._ensure_repo() | |
| # ββ Repo management ββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _ensure_repo(self): | |
| try: | |
| self.api.repo_info(repo_id=HF_REPO_ID, repo_type="dataset") | |
| print(f"[SYNC] Dataset repo found: {HF_REPO_ID}") | |
| except Exception: | |
| print(f"[SYNC] Creating dataset repo: {HF_REPO_ID}") | |
| self.api.create_repo(repo_id=HF_REPO_ID, repo_type="dataset", private=True) | |
| # ββ Restore (startup) βββββββββββββββββββββββββββββββββββββββββββββ | |
| def load_from_repo(self): | |
| """Download from dataset β ~/.openclaw""" | |
| if not self.enabled: | |
| return | |
| print(f"[SYNC] βΆ Restoring ~/.openclaw from dataset {HF_REPO_ID} ...") | |
| OPENCLAW_HOME.mkdir(parents=True, exist_ok=True) | |
| try: | |
| files = self.api.list_repo_files(repo_id=HF_REPO_ID, repo_type="dataset") | |
| openclaw_files = [f for f in files if f.startswith(f"{DATASET_PATH}/")] | |
| if not openclaw_files: | |
| print(f"[SYNC] No {DATASET_PATH}/ folder in dataset. Starting fresh.") | |
| self._ensure_default_config() | |
| self._ensure_telegram_credentials() | |
| return | |
| print(f"[SYNC] Found {len(openclaw_files)} files under {DATASET_PATH}/ in dataset") | |
| with tempfile.TemporaryDirectory() as tmpdir: | |
| snapshot_download( | |
| repo_id=HF_REPO_ID, | |
| repo_type="dataset", | |
| allow_patterns=f"{DATASET_PATH}/**", | |
| local_dir=tmpdir, | |
| token=HF_TOKEN, | |
| ) | |
| downloaded_root = Path(tmpdir) / DATASET_PATH | |
| if downloaded_root.exists(): | |
| for item in downloaded_root.rglob("*"): | |
| if item.is_file(): | |
| rel = item.relative_to(downloaded_root) | |
| dest = OPENCLAW_HOME / rel | |
| dest.parent.mkdir(parents=True, exist_ok=True) | |
| shutil.copy2(str(item), str(dest)) | |
| print("[SYNC] β Restore completed.") | |
| else: | |
| print("[SYNC] Downloaded snapshot but dir not found. Starting fresh.") | |
| except Exception as e: | |
| print(f"[SYNC] β Restore failed: {e}") | |
| traceback.print_exc() | |
| # Patch config & telegram after restore | |
| self._patch_config() | |
| self._ensure_telegram_credentials() | |
| self._debug_list_files() | |
| # ββ Save (periodic + shutdown) βββββββββββββββββββββββββββββββββββββ | |
| def save_to_repo(self): | |
| """Upload entire ~/.openclaw directory β dataset (all files, no filtering)""" | |
| if not self.enabled: | |
| return | |
| if not OPENCLAW_HOME.exists(): | |
| print("[SYNC] ~/.openclaw does not exist, nothing to save.") | |
| return | |
| print(f"[SYNC] βΆ Uploading ~/.openclaw β dataset {HF_REPO_ID}/{DATASET_PATH}/ ...") | |
| try: | |
| # Log what will be uploaded | |
| total_size = 0 | |
| file_count = 0 | |
| for root, dirs, fls in os.walk(OPENCLAW_HOME): | |
| for fn in fls: | |
| fp = os.path.join(root, fn) | |
| sz = os.path.getsize(fp) | |
| total_size += sz | |
| file_count += 1 | |
| rel = os.path.relpath(fp, OPENCLAW_HOME) | |
| print(f"[SYNC] uploading: {rel} ({sz} bytes)") | |
| print(f"[SYNC] Uploading: {file_count} files, {total_size} bytes total") | |
| if file_count == 0: | |
| print("[SYNC] Nothing to upload.") | |
| return | |
| # Upload directory, excluding large log files that trigger LFS errors | |
| self.api.upload_folder( | |
| folder_path=str(OPENCLAW_HOME), | |
| path_in_repo=DATASET_PATH, | |
| repo_id=HF_REPO_ID, | |
| repo_type="dataset", | |
| token=HF_TOKEN, | |
| commit_message=f"Sync .openclaw β {datetime.now().isoformat()}", | |
| ignore_patterns=["*.log", "*.log.*"], # Exclude logs (sync.log is 60MB+, triggers LFS errors) | |
| ) | |
| print(f"[SYNC] β Upload completed at {datetime.now().isoformat()}") | |
| # Verify | |
| try: | |
| files = self.api.list_repo_files(repo_id=HF_REPO_ID, repo_type="dataset") | |
| oc_files = [f for f in files if f.startswith(f"{DATASET_PATH}/")] | |
| print(f"[SYNC] Dataset now has {len(oc_files)} files under {DATASET_PATH}/") | |
| for f in oc_files[:30]: | |
| print(f"[SYNC] {f}") | |
| if len(oc_files) > 30: | |
| print(f"[SYNC] ... and {len(oc_files) - 30} more") | |
| except Exception: | |
| pass | |
| except Exception as e: | |
| print(f"[SYNC] β Upload failed: {e}") | |
| traceback.print_exc() | |
| # ββ Config helpers βββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _ensure_default_config(self): | |
| config_path = OPENCLAW_HOME / "openclaw.json" | |
| if config_path.exists(): | |
| return | |
| default_src = Path(__file__).parent / "openclaw.json.default" | |
| if default_src.exists(): | |
| shutil.copy2(str(default_src), str(config_path)) | |
| print("[SYNC] Created openclaw.json from default template") | |
| else: | |
| with open(config_path, "w") as f: | |
| json.dump({ | |
| "gateway": { | |
| "mode": "local", "bind": "lan", "port": 7860, | |
| "trustedProxies": ["0.0.0.0/0"], | |
| "controlUi": { | |
| "allowInsecureAuth": True, | |
| "allowedOrigins": [ | |
| "https://tao-shen-openclaw-ai.hf.space", | |
| "https://huggingface.co" | |
| ] | |
| } | |
| }, | |
| "session": {"scope": "global"}, | |
| "models": {"mode": "merge", "providers": {}}, | |
| "agents": {"defaults": {"workspace": "~/.openclaw/workspace"}} | |
| }, f) | |
| print("[SYNC] Created minimal openclaw.json") | |
| def _patch_config(self): | |
| """Ensure critical settings after restore.""" | |
| config_path = OPENCLAW_HOME / "openclaw.json" | |
| if not config_path.exists(): | |
| self._ensure_default_config() | |
| return | |
| print("[SYNC] Patching configuration...") | |
| try: | |
| with open(config_path, "r") as f: | |
| data = json.load(f) | |
| print("[SYNC] Config parsed OK.") | |
| except (json.JSONDecodeError, Exception) as e: | |
| # Config is corrupt β back up and start fresh | |
| print(f"[SYNC] Config JSON is corrupt: {e}") | |
| backup = config_path.with_suffix(f".corrupt_{int(time.time())}") | |
| try: | |
| import shutil | |
| shutil.copy2(config_path, backup) | |
| print(f"[SYNC] Backed up corrupt config to {backup.name}") | |
| except Exception: | |
| pass | |
| data = {} | |
| print("[SYNC] Starting from clean config.") | |
| try: | |
| # Remove /dev/null from plugins.locations | |
| if "plugins" in data and isinstance(data.get("plugins"), dict): | |
| locs = data["plugins"].get("locations", []) | |
| if isinstance(locs, list) and "/dev/null" in locs: | |
| data["plugins"]["locations"] = [l for l in locs if l != "/dev/null"] | |
| # Force full gateway config for HF Spaces | |
| # Note: Dockerfile injects "openclaw-space-default" token into Control UI, | |
| # so we MUST set it here to match what the browser sends. | |
| data["gateway"] = { | |
| "mode": "local", | |
| "bind": "lan", | |
| "port": 7860, | |
| "auth": {"token": "openclaw-space-default"}, | |
| "trustedProxies": ["0.0.0.0/0"], | |
| "controlUi": { | |
| "allowInsecureAuth": True, | |
| "allowedOrigins": [ | |
| "https://tao-shen-openclaw-ai.hf.space", | |
| "https://huggingface.co" | |
| ] | |
| } | |
| } | |
| print("[SYNC] Set gateway config (auth=default, trustedProxies=all)") | |
| # Ensure agents defaults | |
| data.setdefault("agents", {}).setdefault("defaults", {}).setdefault("model", {}) | |
| data.setdefault("session", {})["scope"] = "global" | |
| # Force OpenRouter provider | |
| data.setdefault("models", {}).setdefault("providers", {}) | |
| data["models"]["providers"]["openrouter"] = { | |
| "baseUrl": "https://openrouter.ai/api/v1", | |
| "apiKey": "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", | |
| "api": "openai-completions", | |
| "models": [ | |
| {"id": "stepfun/step-3.5-flash:free", "name": "Step-3.5-Flash (Free)"}, | |
| {"id": "deepseek/deepseek-chat:free", "name": "DeepSeek V3 (Free)"} | |
| ] | |
| } | |
| # Remove old gemini provider if present | |
| data["models"]["providers"].pop("gemini", None) | |
| data["agents"]["defaults"]["model"]["primary"] = "openrouter/stepfun/step-3.5-flash:free" | |
| # Telegram plugin | |
| data.setdefault("plugins", {}).setdefault("entries", {}) | |
| if "telegram" not in data["plugins"]["entries"]: | |
| data["plugins"]["entries"]["telegram"] = {"enabled": True} | |
| elif isinstance(data["plugins"]["entries"]["telegram"], dict): | |
| data["plugins"]["entries"]["telegram"]["enabled"] = True | |
| with open(config_path, "w") as f: | |
| json.dump(data, f, indent=2) | |
| print("[SYNC] Config patched and saved.") | |
| # Verify write | |
| with open(config_path, "r") as f: | |
| verify_data = json.load(f) | |
| gw = verify_data.get("gateway", {}) | |
| providers = list(verify_data.get("models", {}).get("providers", {}).keys()) | |
| primary = verify_data.get("agents", {}).get("defaults", {}).get("model", {}).get("primary") | |
| print(f"[SYNC] VERIFY: gateway.port={gw.get('port')}, providers={providers}, primary={primary}") | |
| except Exception as e: | |
| print(f"[SYNC] Failed to patch config: {e}") | |
| traceback.print_exc() | |
| def _ensure_telegram_credentials(self): | |
| """Configure Telegram bot token and allowed users.""" | |
| creds_dir = OPENCLAW_HOME / "credentials" | |
| creds_dir.mkdir(parents=True, exist_ok=True) | |
| if TELEGRAM_BOT_TOKEN: | |
| bot_file = creds_dir / "telegram-bot-token.json" | |
| with open(bot_file, "w") as f: | |
| json.dump({"token": TELEGRAM_BOT_TOKEN, "bot": TELEGRAM_BOT_NAME}, f, indent=2) | |
| print(f"[SYNC] Telegram bot configured: {TELEGRAM_BOT_NAME}") | |
| allow_file = creds_dir / "telegram-allowFrom.json" | |
| if not allow_file.exists(): | |
| with open(allow_file, "w") as f: | |
| json.dump([TELEGRAM_ALLOW_USER], f, indent=2) | |
| print(f"[SYNC] Created telegram-allowFrom.json for {TELEGRAM_ALLOW_USER}") | |
| else: | |
| try: | |
| with open(allow_file, "r") as f: | |
| data = json.load(f) | |
| if not isinstance(data, list): | |
| data = [TELEGRAM_ALLOW_USER] | |
| elif TELEGRAM_ALLOW_USER not in data: | |
| data.append(TELEGRAM_ALLOW_USER) | |
| with open(allow_file, "w") as f: | |
| json.dump(data, f, indent=2) | |
| except Exception: | |
| with open(allow_file, "w") as f: | |
| json.dump([TELEGRAM_ALLOW_USER], f, indent=2) | |
| def _debug_list_files(self): | |
| print(f"[SYNC] Local ~/.openclaw tree:") | |
| try: | |
| count = 0 | |
| for root, dirs, files in os.walk(OPENCLAW_HOME): | |
| dirs[:] = [d for d in dirs if d not in {".cache", "node_modules", "__pycache__"}] | |
| for name in sorted(files): | |
| rel = os.path.relpath(os.path.join(root, name), OPENCLAW_HOME) | |
| print(f"[SYNC] {rel}") | |
| count += 1 | |
| if count > 50: | |
| print("[SYNC] ... (truncated)") | |
| return | |
| except Exception as e: | |
| print(f"[SYNC] listing failed: {e}") | |
| # ββ Background sync loop ββββββββββββββββββββββββββββββββββββββββββ | |
| def background_sync_loop(self, stop_event): | |
| print(f"[SYNC] Background sync started (interval={SYNC_INTERVAL}s)") | |
| while not stop_event.is_set(): | |
| if stop_event.wait(timeout=SYNC_INTERVAL): | |
| break | |
| print(f"[SYNC] ββ Periodic sync triggered at {datetime.now().isoformat()} ββ") | |
| self.save_to_repo() | |
| # ββ Application runner βββββββββββββββββββββββββββββββββββββββββββββ | |
| def run_openclaw(self): | |
| log_file = OPENCLAW_HOME / "workspace" / "startup.log" | |
| log_file.parent.mkdir(parents=True, exist_ok=True) | |
| cmd = f"node dist/entry.js gateway 2>&1 | tee -a {log_file}" | |
| print(f"[SYNC] Launching: {cmd}") | |
| return subprocess.Popen(cmd, shell=True, cwd=str(APP_DIR), | |
| stdout=sys.stdout, stderr=sys.stderr) | |
| # ββ Main ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def main(): | |
| sync = OpenClawFullSync() | |
| # 1. Restore | |
| sync.load_from_repo() | |
| # 2. Background sync | |
| stop_event = threading.Event() | |
| t = threading.Thread(target=sync.background_sync_loop, args=(stop_event,), daemon=True) | |
| t.start() | |
| # 3. Start application | |
| process = sync.run_openclaw() | |
| # Signal handler | |
| def handle_signal(sig, frame): | |
| print(f"\n[SYNC] Signal {sig} received. Shutting down...") | |
| stop_event.set() | |
| if process: | |
| process.terminate() | |
| try: | |
| process.wait(timeout=5) | |
| except subprocess.TimeoutExpired: | |
| process.kill() | |
| print("[SYNC] Final sync...") | |
| sync.save_to_repo() | |
| sys.exit(0) | |
| signal.signal(signal.SIGINT, handle_signal) | |
| signal.signal(signal.SIGTERM, handle_signal) | |
| # Wait | |
| exit_code = process.wait() | |
| print(f"[SYNC] OpenClaw exited with code {exit_code}") | |
| stop_event.set() | |
| print("[SYNC] Final sync...") | |
| sync.save_to_repo() | |
| sys.exit(exit_code) | |
| if __name__ == "__main__": | |
| main() | |