tao-shen commited on
Commit
e2bd30e
·
verified ·
1 Parent(s): 03b9c12

Upload scripts/sync_hf.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. scripts/sync_hf.py +45 -774
scripts/sync_hf.py CHANGED
@@ -1,783 +1,54 @@
1
  #!/usr/bin/env python3
2
- """
3
- OpenClaw HF Spaces Persistence — Full Directory Sync
4
- =====================================================
5
-
6
- Simplified persistence: upload/download the entire ~/.openclaw directory
7
- as-is to/from a Hugging Face Dataset repo.
8
-
9
- - Startup: snapshot_download → ~/.openclaw
10
- - Periodic: upload_folder → dataset openclaw_data/
11
- - Shutdown: final upload_folder → dataset openclaw_data/
12
- """
13
-
14
- import os
15
  import sys
16
- import time
17
- import threading
18
- import subprocess
19
- import signal
20
  import json
21
- import shutil
22
- import tempfile
23
- import traceback
24
- import re
25
- import urllib.request
26
- import ssl
27
- from pathlib import Path
28
- from datetime import datetime
29
- # Set timeout BEFORE importing huggingface_hub
30
- os.environ.setdefault("HF_HUB_DOWNLOAD_TIMEOUT", "300")
31
- os.environ.setdefault("HF_HUB_UPLOAD_TIMEOUT", "600")
32
- # Suppress huggingface_hub progress bars and verbose download/upload logs
33
- os.environ.setdefault("HF_HUB_DISABLE_PROGRESS_BARS", "1")
34
- os.environ.setdefault("HF_HUB_VERBOSITY", "warning")
35
-
36
- import logging as _logging
37
- _logging.getLogger("huggingface_hub").setLevel(_logging.WARNING)
38
- _logging.getLogger("huggingface_hub.utils").setLevel(_logging.WARNING)
39
- _logging.getLogger("filelock").setLevel(_logging.WARNING)
40
-
41
- from huggingface_hub import HfApi, snapshot_download
42
-
43
- # ── Logging helper ──────────────────────────────────────────────────────────
44
-
45
- class TeeLogger:
46
- """Duplicate output to stream and file."""
47
- def __init__(self, filename, stream):
48
- self.stream = stream
49
- self.file = open(filename, "a", encoding="utf-8")
50
- def write(self, message):
51
- self.stream.write(message)
52
- self.file.write(message)
53
- self.flush()
54
- def flush(self):
55
- self.stream.flush()
56
- self.file.flush()
57
- def fileno(self):
58
- return self.stream.fileno()
59
-
60
- # ── Configuration ───────────────────────────────────────────────────────────
61
-
62
- HF_TOKEN = os.environ.get("HF_TOKEN")
63
- OPENCLAW_HOME = Path.home() / ".openclaw"
64
- APP_DIR = Path("/app/openclaw")
65
-
66
- # Use ".openclaw" - directly read/write the .openclaw folder in dataset
67
- DATASET_PATH = ".openclaw"
68
-
69
- # OpenAI-compatible API (OpenAI, OpenRouter, or any compatible endpoint)
70
- OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "")
71
- OPENAI_BASE_URL = os.environ.get("OPENAI_BASE_URL", "https://api.openai.com/v1").rstrip("/")
72
-
73
- # OpenRouter API key (optional; alternative to OPENAI_API_KEY + OPENAI_BASE_URL)
74
- OPENROUTER_API_KEY = os.environ.get("OPENROUTER_API_KEY", "")
75
-
76
- # Zhipu AI (z.ai) API key (optional; GLM-4 series, Anthropic-compatible endpoint)
77
- ZHIPU_API_KEY = os.environ.get("ZHIPU_API_KEY", "")
78
-
79
- # Gateway token (default: huggingclaw; override via GATEWAY_TOKEN env var)
80
- GATEWAY_TOKEN = os.environ.get("GATEWAY_TOKEN", "huggingclaw")
81
-
82
- # A2A configuration (optional; only activated when A2A_PEERS is set)
83
- AGENT_NAME = os.environ.get("AGENT_NAME", "HuggingClaw")
84
- A2A_PEERS = os.environ.get("A2A_PEERS", "") # comma-separated peer URLs
85
-
86
- # Default model for new conversations (infer from provider if not set)
87
- OPENCLAW_DEFAULT_MODEL = os.environ.get("OPENCLAW_DEFAULT_MODEL") or (
88
- "openai/gpt-5-nano" if OPENAI_API_KEY
89
- else "zhipu/glm-4.5-air" if ZHIPU_API_KEY
90
- else "openrouter/openai/gpt-oss-20b:free"
91
- )
92
-
93
- # HF Spaces built-in env vars (auto-set by HF runtime)
94
- SPACE_HOST = os.environ.get("SPACE_HOST", "") # e.g. "tao-shen-huggingclaw.hf.space"
95
- SPACE_ID = os.environ.get("SPACE_ID", "") # e.g. "tao-shen/HuggingClaw"
96
-
97
- SYNC_INTERVAL = int(os.environ.get("SYNC_INTERVAL", "60"))
98
- AUTO_CREATE_DATASET = os.environ.get("AUTO_CREATE_DATASET", "false").lower() in ("true", "1", "yes")
99
 
100
- # Dataset repo: always auto-derive from SPACE_ID when not explicitly set.
101
- # Format: {username}/{SpaceName}-data (e.g. "your-name/YourSpace-data")
102
- # This ensures each duplicated Space gets its own dataset automatically.
103
- HF_REPO_ID = os.environ.get("OPENCLAW_DATASET_REPO", "")
104
- if not HF_REPO_ID and SPACE_ID:
105
- # SPACE_ID = "username/SpaceName" → derive "username/SpaceName-data"
106
- HF_REPO_ID = f"{SPACE_ID}-data"
107
- print(f"[SYNC] OPENCLAW_DATASET_REPO not set — auto-derived from SPACE_ID: {HF_REPO_ID}")
108
- elif not HF_REPO_ID and HF_TOKEN:
109
- # Fallback: no SPACE_ID (local Docker), derive from HF_TOKEN username
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  try:
111
- _api = HfApi(token=HF_TOKEN)
112
- _username = _api.whoami()["name"]
113
- HF_REPO_ID = f"{_username}/HuggingClaw-data"
114
- print(f"[SYNC] OPENCLAW_DATASET_REPO not set — auto-derived from HF_TOKEN: {HF_REPO_ID}")
115
- del _api, _username
116
- except Exception as e:
117
- print(f"[SYNC] WARNING: Could not derive username from HF_TOKEN: {e}")
118
- HF_REPO_ID = ""
119
-
120
- # Setup logging
121
- log_dir = OPENCLAW_HOME / "workspace"
122
- log_dir.mkdir(parents=True, exist_ok=True)
123
- sys.stdout = TeeLogger(log_dir / "sync.log", sys.stdout)
124
- sys.stderr = sys.stdout
125
-
126
- # ── Telegram API Base Auto-Probe ────────────────────────────────────────────
127
- #
128
- # HF Spaces blocks DNS for api.telegram.org. grammY uses Node 22's built-in
129
- # fetch (undici) which bypasses dns.lookup patching and /etc/hosts.
130
- #
131
- # Solution: probe multiple Telegram API endpoints at startup. If the official
132
- # endpoint is unreachable, pick the first working mirror. Then:
133
- # 1. Set TELEGRAM_API_ROOT env var for the Node process
134
- # 2. telegram-proxy.cjs (loaded via NODE_OPTIONS --require) intercepts
135
- # globalThis.fetch() and rewrites api.telegram.org URLs to the mirror.
136
- #
137
- # This works without a bot token — we just test HTTP reachability.
138
- # If a bot token IS available, we do a full getMe verification.
139
-
140
- # User can force a specific base via env var (skip auto-probe)
141
- TELEGRAM_API_BASE = os.environ.get("TELEGRAM_API_BASE", "")
142
-
143
- TELEGRAM_API_BASES = [
144
- "https://api.telegram.org", # official
145
- "https://telegram-api.mykdigi.com", # known mirror
146
- "https://telegram-api-proxy-anonymous.pages.dev/api", # Cloudflare Pages proxy
147
- ]
148
-
149
-
150
- def probe_telegram_api(timeout: int = 8) -> str:
151
- """Probe Telegram API endpoints and return the first reachable one.
152
-
153
- First checks if official api.telegram.org is reachable (HTTP level).
154
- If not, tries mirrors. No bot token required — just tests connectivity.
155
- Returns the working base URL (without trailing slash), or "" if all fail.
156
- """
157
- ctx = ssl.create_default_context()
158
- for base in TELEGRAM_API_BASES:
159
- url = base.rstrip("/") + "/"
160
- try:
161
- req = urllib.request.Request(url, method="GET")
162
- resp = urllib.request.urlopen(req, timeout=timeout, context=ctx)
163
- print(f"[TELEGRAM] ✓ Reachable: {base} (HTTP {resp.status})")
164
- return base.rstrip("/")
165
- except urllib.error.HTTPError as e:
166
- # HTTP error (4xx/5xx) still means the host IS reachable
167
- print(f"[TELEGRAM] ✓ Reachable: {base} (HTTP {e.code})")
168
- return base.rstrip("/")
169
- except Exception as e:
170
- reason = str(e)[:80]
171
- print(f"[TELEGRAM] ✗ Unreachable: {base} ({reason})")
172
- continue
173
-
174
- print("[TELEGRAM] WARNING: All API endpoints unreachable!")
175
- return ""
176
-
177
-
178
- # ── Sync Manager ────────────────────────────────────────────────────────────
179
-
180
- class OpenClawFullSync:
181
- """Upload/download the entire ~/.openclaw directory to HF Dataset."""
182
-
183
- def __init__(self):
184
- self.enabled = False
185
- self.dataset_exists = False
186
- self.api = None
187
-
188
- if not HF_TOKEN:
189
- print("[SYNC] WARNING: HF_TOKEN not set. Persistence disabled.")
190
- return
191
- if not HF_REPO_ID:
192
- print("[SYNC] WARNING: Could not determine dataset repo (no SPACE_ID or OPENCLAW_DATASET_REPO).")
193
- print("[SYNC] Persistence disabled.")
194
- return
195
-
196
- self.enabled = True
197
- self.api = HfApi(token=HF_TOKEN)
198
- self.dataset_exists = self._ensure_repo_exists()
199
-
200
- # ── Repo management ────────────────────────────────────────────────
201
-
202
- def _ensure_repo_exists(self):
203
- """Check if dataset repo exists; auto-create only when AUTO_CREATE_DATASET=true AND HF_TOKEN is set."""
204
- try:
205
- self.api.repo_info(repo_id=HF_REPO_ID, repo_type="dataset")
206
- print(f"[SYNC] Dataset repo found: {HF_REPO_ID}")
207
- return True
208
- except Exception:
209
- if not AUTO_CREATE_DATASET:
210
- print(f"[SYNC] Dataset repo NOT found: {HF_REPO_ID}")
211
- print(f"[SYNC] Set AUTO_CREATE_DATASET=true to auto-create.")
212
- print(f"[SYNC] Persistence disabled (app will still run normally).")
213
- return False
214
- print(f"[SYNC] Dataset repo NOT found: {HF_REPO_ID} — creating...")
215
- try:
216
- self.api.create_repo(
217
- repo_id=HF_REPO_ID,
218
- repo_type="dataset",
219
- private=True,
220
- )
221
- print(f"[SYNC] ✓ Dataset repo created: {HF_REPO_ID}")
222
- return True
223
- except Exception as e:
224
- print(f"[SYNC] ✗ Failed to create dataset repo: {e}")
225
- return False
226
-
227
- # ── Restore (startup) ─────────────────────────────────────────────
228
-
229
- def load_from_repo(self):
230
- """Download from dataset → ~/.openclaw"""
231
- if not self.enabled:
232
- print("[SYNC] Persistence disabled - skipping restore")
233
- self._ensure_default_config()
234
- self._patch_config()
235
- return
236
-
237
- if not self.dataset_exists:
238
- print(f"[SYNC] Dataset {HF_REPO_ID} does not exist - starting fresh")
239
- self._ensure_default_config()
240
- self._patch_config()
241
- return
242
-
243
- print(f"[SYNC] ▶ Restoring ~/.openclaw from dataset {HF_REPO_ID} ...")
244
- OPENCLAW_HOME.mkdir(parents=True, exist_ok=True)
245
-
246
- try:
247
- files = self.api.list_repo_files(repo_id=HF_REPO_ID, repo_type="dataset")
248
- openclaw_files = [f for f in files if f.startswith(f"{DATASET_PATH}/")]
249
- if not openclaw_files:
250
- print(f"[SYNC] No {DATASET_PATH}/ folder in dataset. Starting fresh.")
251
- self._ensure_default_config()
252
- self._patch_config()
253
- return
254
-
255
- print(f"[SYNC] Found {len(openclaw_files)} files under {DATASET_PATH}/ in dataset")
256
-
257
- with tempfile.TemporaryDirectory() as tmpdir:
258
- snapshot_download(
259
- repo_id=HF_REPO_ID,
260
- repo_type="dataset",
261
- allow_patterns=f"{DATASET_PATH}/**",
262
- local_dir=tmpdir,
263
- token=HF_TOKEN,
264
- )
265
- downloaded_root = Path(tmpdir) / DATASET_PATH
266
- if downloaded_root.exists():
267
- for item in downloaded_root.rglob("*"):
268
- if item.is_file():
269
- rel = item.relative_to(downloaded_root)
270
- dest = OPENCLAW_HOME / rel
271
- dest.parent.mkdir(parents=True, exist_ok=True)
272
- shutil.copy2(str(item), str(dest))
273
- print("[SYNC] ✓ Restore completed.")
274
- else:
275
- print("[SYNC] Downloaded snapshot but dir not found. Starting fresh.")
276
-
277
- except Exception as e:
278
- print(f"[SYNC] ✗ Restore failed: {e}")
279
- traceback.print_exc()
280
-
281
- # Patch config after restore
282
- self._patch_config()
283
- self._debug_list_files()
284
-
285
- # ── Save (periodic + shutdown) ─────────────────────────────────────
286
-
287
- def save_to_repo(self):
288
- """Upload entire ~/.openclaw directory → dataset (all files, no filtering)"""
289
- if not self.enabled:
290
- return
291
- if not OPENCLAW_HOME.exists():
292
- print("[SYNC] ~/.openclaw does not exist, nothing to save.")
293
- return
294
-
295
- # Ensure dataset exists (auto-create if needed)
296
- if not self._ensure_repo_exists():
297
- print(f"[SYNC] Dataset {HF_REPO_ID} unavailable - skipping save")
298
- return
299
-
300
- print(f"[SYNC] ▶ Uploading ~/.openclaw → dataset {HF_REPO_ID}/{DATASET_PATH}/ ...")
301
-
302
- try:
303
- # Count files to upload (no per-file logging to reduce noise)
304
- total_size = 0
305
- file_count = 0
306
- for root, dirs, fls in os.walk(OPENCLAW_HOME):
307
- for fn in fls:
308
- fp = os.path.join(root, fn)
309
- total_size += os.path.getsize(fp)
310
- file_count += 1
311
- print(f"[SYNC] Uploading: {file_count} files, {total_size} bytes total")
312
-
313
- if file_count == 0:
314
- print("[SYNC] Nothing to upload.")
315
- return
316
-
317
- # Upload directory, excluding large log files that trigger LFS rejection
318
- self.api.upload_folder(
319
- folder_path=str(OPENCLAW_HOME),
320
- path_in_repo=DATASET_PATH,
321
- repo_id=HF_REPO_ID,
322
- repo_type="dataset",
323
- token=HF_TOKEN,
324
- commit_message=f"Sync .openclaw — {datetime.now().isoformat()}",
325
- ignore_patterns=[
326
- "*.log", # Log files (sync.log, startup.log) — regenerated on boot
327
- "*.lock", # Lock files — stale after restart
328
- "*.tmp", # Temp files
329
- "*.pid", # PID files
330
- "__pycache__", # Python cache
331
- ],
332
- )
333
- print(f"[SYNC] ✓ Upload completed at {datetime.now().isoformat()}")
334
-
335
- # Verify (summary only)
336
- try:
337
- files = self.api.list_repo_files(repo_id=HF_REPO_ID, repo_type="dataset")
338
- oc_files = [f for f in files if f.startswith(f"{DATASET_PATH}/")]
339
- print(f"[SYNC] Dataset now has {len(oc_files)} files under {DATASET_PATH}/")
340
- except Exception:
341
- pass
342
-
343
- except Exception as e:
344
- print(f"[SYNC] ✗ Upload failed: {e}")
345
- traceback.print_exc()
346
-
347
- # ── Config helpers ─────────────────────────────────────────────────
348
-
349
- def _ensure_default_config(self):
350
- config_path = OPENCLAW_HOME / "openclaw.json"
351
- if config_path.exists():
352
- return
353
- default_src = Path(__file__).parent / "openclaw.json.default"
354
- if default_src.exists():
355
- shutil.copy2(str(default_src), str(config_path))
356
- # Replace placeholder or remove provider if no API key
357
- try:
358
- with open(config_path, "r") as f:
359
- cfg = json.load(f)
360
- # Set gateway token
361
- if "gateway" in cfg:
362
- cfg["gateway"]["auth"] = {"token": GATEWAY_TOKEN}
363
- if OPENAI_API_KEY and "models" in cfg and "providers" in cfg["models"] and "openai" in cfg["models"]["providers"]:
364
- cfg["models"]["providers"]["openai"]["apiKey"] = OPENAI_API_KEY
365
- if OPENAI_BASE_URL:
366
- cfg["models"]["providers"]["openai"]["baseUrl"] = OPENAI_BASE_URL
367
- elif "models" in cfg and "providers" in cfg["models"]:
368
- if not OPENAI_API_KEY:
369
- cfg["models"]["providers"].pop("openai", None)
370
- if OPENROUTER_API_KEY:
371
- if "models" in cfg and "providers" in cfg["models"] and "openrouter" in cfg["models"]["providers"]:
372
- cfg["models"]["providers"]["openrouter"]["apiKey"] = OPENROUTER_API_KEY
373
- else:
374
- if "models" in cfg and "providers" in cfg["models"]:
375
- cfg["models"]["providers"].pop("openrouter", None)
376
- print("[SYNC] No OPENROUTER_API_KEY — removed openrouter provider from config")
377
- with open(config_path, "w") as f:
378
- json.dump(cfg, f, indent=2)
379
- except Exception as e:
380
- print(f"[SYNC] Warning: failed to patch default config: {e}")
381
- print("[SYNC] Created openclaw.json from default template")
382
- else:
383
- with open(config_path, "w") as f:
384
- json.dump({
385
- "gateway": {
386
- "mode": "local", "bind": "lan", "port": 7860,
387
- "trustedProxies": ["0.0.0.0/0"],
388
- "controlUi": {
389
- "allowInsecureAuth": True,
390
- "allowedOrigins": [
391
- "https://huggingface.co"
392
- ]
393
- }
394
- },
395
- "session": {"scope": "global"},
396
- "models": {"mode": "merge", "providers": {}},
397
- "agents": {"defaults": {"workspace": "~/.openclaw/workspace"}}
398
- }, f)
399
- print("[SYNC] Created minimal openclaw.json")
400
-
401
- def _patch_config(self):
402
- """Ensure critical settings after restore."""
403
- config_path = OPENCLAW_HOME / "openclaw.json"
404
- if not config_path.exists():
405
- self._ensure_default_config()
406
- return
407
-
408
- print("[SYNC] Patching configuration...")
409
- try:
410
- with open(config_path, "r") as f:
411
- data = json.load(f)
412
- print("[SYNC] Config parsed OK.")
413
- except (json.JSONDecodeError, Exception) as e:
414
- # Config is corrupt — back up and start fresh
415
- print(f"[SYNC] Config JSON is corrupt: {e}")
416
- backup = config_path.with_suffix(f".corrupt_{int(time.time())}")
417
- try:
418
- shutil.copy2(config_path, backup)
419
- print(f"[SYNC] Backed up corrupt config to {backup.name}")
420
- except Exception:
421
- pass
422
- data = {}
423
- print("[SYNC] Starting from clean config.")
424
-
425
- try:
426
- # Remove /dev/null from plugins.locations
427
- if "plugins" in data and isinstance(data.get("plugins"), dict):
428
- locs = data["plugins"].get("locations", [])
429
- if isinstance(locs, list) and "/dev/null" in locs:
430
- data["plugins"]["locations"] = [l for l in locs if l != "/dev/null"]
431
-
432
- # Clean up invalid config keys that crash OpenClaw
433
- if "auth" in data and isinstance(data.get("auth"), dict):
434
- data["auth"].pop("defaultScope", None)
435
- if not data["auth"]:
436
- del data["auth"]
437
- if "gateway" in data and isinstance(data.get("gateway"), dict):
438
- auth = data["gateway"].get("auth", {})
439
- if isinstance(auth, dict):
440
- auth.pop("scope", None)
441
-
442
- # Force full gateway config for HF Spaces
443
- # Dynamic allowedOrigins from SPACE_HOST (auto-set by HF runtime)
444
- allowed_origins = [
445
- "https://huggingface.co",
446
- "https://*.hf.space",
447
- ]
448
- if SPACE_HOST:
449
- allowed_origins.append(f"https://{SPACE_HOST}")
450
- print(f"[SYNC] SPACE_HOST detected: {SPACE_HOST}")
451
- data["gateway"] = {
452
- "mode": "local",
453
- "bind": "lan",
454
- "port": 7860,
455
- "auth": {"token": GATEWAY_TOKEN},
456
- "trustedProxies": ["0.0.0.0/0"],
457
- "controlUi": {
458
- "allowInsecureAuth": True,
459
- "dangerouslyDisableDeviceAuth": True,
460
- "allowedOrigins": allowed_origins
461
- }
462
  }
463
- print(f"[SYNC] Set gateway config (port=7860, auth=token, origins={len(allowed_origins)})")
464
-
465
- # Ensure agents defaults
466
- data.setdefault("agents", {}).setdefault("defaults", {}).setdefault("model", {})
467
- data.setdefault("session", {})["scope"] = "global"
468
-
469
- # Build providers from scratch — only include providers with active API keys.
470
- # This ensures removed secrets don't leave stale providers from backup.
471
- providers = {}
472
- if OPENAI_API_KEY:
473
- providers["openai"] = {
474
- "baseUrl": OPENAI_BASE_URL,
475
- "apiKey": OPENAI_API_KEY,
476
- "api": "openai-completions",
477
- }
478
- print(f"[SYNC] Set OpenAI-compatible provider (baseUrl={OPENAI_BASE_URL})")
479
- if OPENROUTER_API_KEY:
480
- providers["openrouter"] = {
481
- "baseUrl": "https://openrouter.ai/api/v1",
482
- "apiKey": OPENROUTER_API_KEY,
483
- "api": "openai-completions",
484
- "models": [
485
- {"id": "openai/gpt-oss-20b:free", "name": "GPT-OSS-20B (Free)"},
486
- {"id": "deepseek/deepseek-chat:free", "name": "DeepSeek V3 (Free)"}
487
- ]
488
- }
489
- print("[SYNC] Set OpenRouter provider")
490
- if ZHIPU_API_KEY:
491
- providers["zhipu"] = {
492
- "baseUrl": "https://open.bigmodel.cn/api/anthropic",
493
- "apiKey": ZHIPU_API_KEY,
494
- "api": "anthropic-messages",
495
- "models": [
496
- {"id": "glm-4.5-air", "name": "GLM-4.5 Air"},
497
- {"id": "glm-4.5", "name": "GLM-4.5"},
498
- {"id": "glm-4.6", "name": "GLM-4.6"},
499
- {"id": "glm-4.7", "name": "GLM-4.7"},
500
- ]
501
- }
502
- print("[SYNC] Set Zhipu AI provider")
503
- if not providers:
504
- print("[SYNC] WARNING: No API key set (OPENAI/OPENROUTER/ZHIPU), LLM features may not work")
505
- data.setdefault("models", {})["providers"] = providers
506
- data["agents"]["defaults"]["model"]["primary"] = OPENCLAW_DEFAULT_MODEL
507
-
508
- # Plugin whitelist
509
- data.setdefault("plugins", {}).setdefault("entries", {})
510
- plugin_allow = ["telegram", "whatsapp"]
511
- if A2A_PEERS:
512
- plugin_allow.append("a2a-gateway")
513
- data["plugins"]["allow"] = plugin_allow
514
- if "telegram" not in data["plugins"]["entries"]:
515
- data["plugins"]["entries"]["telegram"] = {"enabled": True}
516
- elif isinstance(data["plugins"]["entries"]["telegram"], dict):
517
- data["plugins"]["entries"]["telegram"]["enabled"] = True
518
-
519
- # ── A2A Gateway Plugin Configuration (only if A2A_PEERS is set) ──
520
- if A2A_PEERS:
521
- peers = []
522
- for peer_url in A2A_PEERS.split(","):
523
- peer_url = peer_url.strip()
524
- if not peer_url:
525
- continue
526
- name = peer_url.split("//")[-1].split(".")[0].split("-")[-1].capitalize()
527
- peers.append({
528
- "name": name,
529
- "agentCardUrl": f"{peer_url}/.well-known/agent-card.json"
530
- })
531
- print(f"[SYNC] A2A peer: {name} → {peer_url}")
532
-
533
- data["plugins"]["entries"]["a2a-gateway"] = {
534
- "enabled": True,
535
- "config": {
536
- "agentCard": {
537
- "name": AGENT_NAME,
538
- "description": f"{AGENT_NAME} - HuggingClaw A2A Agent",
539
- "skills": [{"id": "chat", "name": "chat", "description": "Chat bridge"}]
540
- },
541
- "server": {"host": "0.0.0.0", "port": 18800},
542
- "security": {"inboundAuth": "none"},
543
- "routing": {"defaultAgentId": "main"},
544
- "peers": peers
545
- }
546
- }
547
- print(f"[SYNC] A2A gateway configured: name={AGENT_NAME}, port=18800, peers={len(peers)}")
548
-
549
- # ── Telegram channel defaults (open DM policy for HF Spaces) ──
550
- # Personal bot on HF Spaces — no need for strict pairing.
551
- tg_ch = data.setdefault("channels", {}).setdefault("telegram", {})
552
- tg_ch["dmPolicy"] = "open"
553
- tg_ch["allowFrom"] = ["*"]
554
- tg_ch["configWrites"] = True
555
- print("[SYNC] Set channels.telegram: dmPolicy=open, allowFrom=[*], configWrites=true")
556
-
557
- # ── Telegram API base auto-probe ──────────────────────────────
558
- # Probe is done in run_openclaw() — sets TELEGRAM_API_ROOT env var
559
- # for the telegram-proxy.cjs preload script to intercept fetch().
560
-
561
- with open(config_path, "w") as f:
562
- json.dump(data, f, indent=2)
563
- print("[SYNC] Config patched and saved.")
564
-
565
- # Fix paired devices scopes (OpenClaw 2026.2.19+ requires operator.write/read)
566
- # Delete old paired devices to force fresh auto-pair with correct scopes
567
- devices_dir = Path(OPENCLAW_DIR) / "devices"
568
- if devices_dir.exists():
569
- import shutil
570
- shutil.rmtree(devices_dir, ignore_errors=True)
571
- print("[SYNC] Deleted devices/ dir to force fresh auto-pair with operator.write/read scopes")
572
-
573
- # Verify write
574
- with open(config_path, "r") as f:
575
- verify_data = json.load(f)
576
- gw = verify_data.get("gateway", {})
577
- providers = list(verify_data.get("models", {}).get("providers", {}).keys())
578
- primary = verify_data.get("agents", {}).get("defaults", {}).get("model", {}).get("primary")
579
- print(f"[SYNC] VERIFY: gateway.port={gw.get('port')}, providers={providers}, primary={primary}")
580
-
581
- except Exception as e:
582
- print(f"[SYNC] Failed to patch config: {e}")
583
- traceback.print_exc()
584
-
585
- def _debug_list_files(self):
586
- try:
587
- count = sum(1 for _, _, files in os.walk(OPENCLAW_HOME) for _ in files)
588
- print(f"[SYNC] Local ~/.openclaw: {count} files")
589
- except Exception as e:
590
- print(f"[SYNC] listing failed: {e}")
591
-
592
- # ── Background sync loop ──────────────────────────────────────────
593
-
594
- def background_sync_loop(self, stop_event):
595
- print(f"[SYNC] Background sync started (interval={SYNC_INTERVAL}s)")
596
- while not stop_event.is_set():
597
- if stop_event.wait(timeout=SYNC_INTERVAL):
598
- break
599
- print(f"[SYNC] ── Periodic sync triggered at {datetime.now().isoformat()} ──")
600
- self.save_to_repo()
601
-
602
- # ── Application runner ─────────────────────────────────────────────
603
-
604
- def run_openclaw(self):
605
- log_file = OPENCLAW_HOME / "workspace" / "startup.log"
606
- log_file.parent.mkdir(parents=True, exist_ok=True)
607
-
608
- # Debug: check if app directory exists
609
- if not Path(APP_DIR).exists():
610
- print(f"[SYNC] ERROR: App directory does not exist: {APP_DIR}")
611
- return None
612
-
613
- # Debug: check entry point (dist/entry.js or openclaw.mjs)
614
- entry_js = Path(APP_DIR) / "dist" / "entry.js"
615
- openclaw_mjs = Path(APP_DIR) / "openclaw.mjs"
616
- if entry_js.exists():
617
- entry_cmd = ["node", "dist/entry.js", "gateway"]
618
- elif openclaw_mjs.exists():
619
- entry_cmd = ["node", "openclaw.mjs", "gateway", "--allow-unconfigured"]
620
- else:
621
- print(f"[SYNC] ERROR: No entry point found in {APP_DIR}")
622
- print(f"[SYNC] Checked: dist/entry.js, openclaw.mjs")
623
- # List what's actually there
624
- try:
625
- print(f"[SYNC] Contents: {list(Path(APP_DIR).iterdir())[:20]}")
626
- except: pass
627
- return None
628
-
629
- # Use subprocess.run with direct output, no shell pipe
630
- print(f"[SYNC] Launching: {' '.join(entry_cmd)}")
631
- print(f"[SYNC] Working directory: {APP_DIR}")
632
- print(f"[SYNC] Log file: {log_file}")
633
-
634
- # Open log file
635
- log_fh = open(log_file, "a")
636
-
637
- # Prepare environment (all API keys passed through for OpenClaw)
638
- env = os.environ.copy()
639
- if OPENAI_API_KEY:
640
- env["OPENAI_API_KEY"] = OPENAI_API_KEY
641
- env["OPENAI_BASE_URL"] = OPENAI_BASE_URL
642
- if OPENROUTER_API_KEY:
643
- env["OPENROUTER_API_KEY"] = OPENROUTER_API_KEY
644
- if ZHIPU_API_KEY:
645
- env["ZHIPU_API_KEY"] = ZHIPU_API_KEY
646
- if not OPENAI_API_KEY and not OPENROUTER_API_KEY and not ZHIPU_API_KEY:
647
- print(f"[SYNC] WARNING: No API key set (OPENAI/OPENROUTER/ZHIPU), LLM features may not work")
648
-
649
- # ── Telegram API base probe ──────────────────────────────────────
650
- # Determine working Telegram API endpoint and set env var for
651
- # telegram-proxy.cjs to intercept fetch() calls.
652
- if TELEGRAM_API_BASE:
653
- tg_root = TELEGRAM_API_BASE.rstrip("/")
654
- print(f"[TELEGRAM] Using user-specified API base: {tg_root}")
655
- else:
656
- print("[TELEGRAM] Probing Telegram API endpoints...")
657
- tg_root = probe_telegram_api()
658
-
659
- if tg_root and tg_root != "https://api.telegram.org":
660
- env["TELEGRAM_API_ROOT"] = tg_root
661
- print(f"[TELEGRAM] Set TELEGRAM_API_ROOT={tg_root}")
662
- print(f"[TELEGRAM] telegram-proxy.cjs will redirect fetch() calls")
663
- elif tg_root:
664
- print("[TELEGRAM] Official API reachable — no proxy needed")
665
- else:
666
- print("[TELEGRAM] No reachable endpoint found — Telegram will not work")
667
- try:
668
- # Use Popen without shell to avoid pipe issues
669
- # auth disabled in config — no token needed
670
- process = subprocess.Popen(
671
- entry_cmd,
672
- cwd=str(APP_DIR),
673
- stdout=subprocess.PIPE, # Capture so we can log it
674
- stderr=subprocess.STDOUT,
675
- text=True,
676
- bufsize=1, # Line buffered
677
- env=env # Pass environment with OPENROUTER_API_KEY
678
- )
679
-
680
- # Create a thread to copy output to log file; only print key lines to console
681
- def copy_output():
682
- try:
683
- for line in process.stdout:
684
- log_fh.write(line)
685
- log_fh.flush()
686
- # Only forward important lines to console (errors, warnings, startup)
687
- # Skip noisy download/progress lines that flood the HF Spaces log viewer
688
- stripped = line.strip()
689
- if not stripped:
690
- continue
691
- # Skip progress bars and download noise
692
- if any(skip in stripped for skip in [
693
- 'Downloading', 'Fetching', '%|', '━', '───',
694
- 'Already cached', 'Using cache', 'tokenizer',
695
- '.safetensors', 'model-', 'shard',
696
- ]):
697
- continue
698
- print(line, end='')
699
- except Exception as e:
700
- print(f"[SYNC] Output copy error: {e}")
701
- finally:
702
- log_fh.close()
703
-
704
- thread = threading.Thread(target=copy_output, daemon=True)
705
- thread.start()
706
-
707
- print(f"[SYNC] Process started with PID: {process.pid}")
708
- return process
709
-
710
- except Exception as e:
711
- log_fh.close()
712
- print(f"[SYNC] ERROR: Failed to start process: {e}")
713
- traceback.print_exc()
714
- return None
715
-
716
- # ── Main ────────────────────────────────────────────────────────────────────
717
-
718
- def main():
719
- try:
720
- t_main_start = time.time()
721
-
722
- t0 = time.time()
723
- sync = OpenClawFullSync()
724
- print(f"[TIMER] sync_hf init: {time.time() - t0:.1f}s")
725
-
726
- # 1. Restore
727
- t0 = time.time()
728
- sync.load_from_repo()
729
- print(f"[TIMER] load_from_repo (restore): {time.time() - t0:.1f}s")
730
-
731
- # 2. Background sync
732
- stop_event = threading.Event()
733
- t = threading.Thread(target=sync.background_sync_loop, args=(stop_event,), daemon=True)
734
- t.start()
735
-
736
- # 3. Start application
737
- t0 = time.time()
738
- process = sync.run_openclaw()
739
- print(f"[TIMER] run_openclaw launch: {time.time() - t0:.1f}s")
740
- print(f"[TIMER] Total startup (init → app launched): {time.time() - t_main_start:.1f}s")
741
-
742
- # Signal handler
743
- def handle_signal(sig, frame):
744
- print(f"\n[SYNC] Signal {sig} received. Shutting down...")
745
- stop_event.set()
746
- # Wait for background sync to finish if it's running
747
- t.join(timeout=10)
748
- if process:
749
- process.terminate()
750
- try:
751
- process.wait(timeout=5)
752
- except subprocess.TimeoutExpired:
753
- process.kill()
754
- print("[SYNC] Final sync...")
755
- sync.save_to_repo()
756
- sys.exit(0)
757
-
758
- signal.signal(signal.SIGINT, handle_signal)
759
- signal.signal(signal.SIGTERM, handle_signal)
760
-
761
- # Wait
762
- if process is None:
763
- print("[SYNC] ERROR: Failed to start OpenClaw process. Exiting.")
764
- stop_event.set()
765
- t.join(timeout=5)
766
- sys.exit(1)
767
-
768
- exit_code = process.wait()
769
- print(f"[SYNC] OpenClaw exited with code {exit_code}")
770
- stop_event.set()
771
- t.join(timeout=10)
772
- print("[SYNC] Final sync...")
773
- sync.save_to_repo()
774
- sys.exit(exit_code)
775
-
776
  except Exception as e:
777
- print(f"[SYNC] FATAL ERROR in main: {e}")
778
- traceback.print_exc()
779
  sys.exit(1)
780
 
781
-
782
- if __name__ == "__main__":
783
- main()
 
1
  #!/usr/bin/env python3
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  import sys
 
 
 
 
3
  import json
4
+ import os
5
+ import time
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
+ # Wait for OpenClaw to initialize
8
+ print("Waiting for OpenClaw to initialize...")
9
+ time.sleep(5)
10
+
11
+ # Load OpenClaw configuration
12
+ try:
13
+ with open('/app/.openclaw/openclaw.json', 'r') as f:
14
+ config = json.load(f)
15
+ print("OpenClaw configuration loaded successfully")
16
+ except Exception as e:
17
+ print(f"Error loading OpenClaw config: {e}")
18
+ sys.exit(1)
19
+
20
+ # Check if A2A gateway is available
21
+ try:
22
+ import a2ag
23
+ a2a_available = True
24
+ print("A2A Gateway extension is available")
25
+ except ImportError:
26
+ a2a_available = False
27
+ print("A2A Gateway extension not available")
28
+
29
+ # If A2A is available, initialize it with Cain's configuration
30
+ if a2a_available:
31
  try:
32
+ # Extract configuration for A2A
33
+ a2a_config = {
34
+ "telegram": {
35
+ "token": config.get("telegram_token", ""),
36
+ "allowed_user_ids": config.get("telegram_allowed_ids", [])
37
+ },
38
+ "llm": {
39
+ "provider": config.get("llm_provider", "openai"),
40
+ "model": config.get("llm_model", "gpt-3.5-turbo"),
41
+ "api_key": config.get("llm_api_key", ""),
42
+ "temperature": config.get("llm_temperature", 0.7)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  }
44
+ }
45
+
46
+ # Initialize A2A gateway
47
+ a2a = a2ag.A2A(a2a_config)
48
+ print("A2A Gateway initialized successfully")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  except Exception as e:
50
+ print(f"Error initializing A2A Gateway: {e}")
 
51
  sys.exit(1)
52
 
53
+ print("Sync script completed successfully")
54
+ sys.exit(0)