File size: 30,630 Bytes
e7ab5f1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e01d625
 
e7ab5f1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
553c78c
 
 
 
 
e7ab5f1
 
0d8eee8
 
201eb72
553c78c
 
a477e02
553c78c
25ff55e
 
 
 
 
d61c242
11d18c7
e7ab5f1
b0f06a1
4da041a
b0f06a1
a902a81
b0f06a1
 
a902a81
 
b0f06a1
 
092c6d8
 
 
8f44b56
a902a81
8f44b56
092c6d8
 
 
 
e7ab5f1
 
 
 
 
 
e01d625
95579c1
 
 
 
 
 
 
 
 
 
 
 
e01d625
 
 
 
 
95579c1
 
e01d625
 
 
 
95579c1
 
 
 
 
 
 
e01d625
 
95579c1
e01d625
 
 
95579c1
 
 
 
 
 
e01d625
 
95579c1
e01d625
 
95579c1
e01d625
 
 
e7ab5f1
 
 
 
 
 
 
 
 
 
 
 
 
 
ad7936d
092c6d8
e7ab5f1
 
 
 
 
 
 
 
 
ad7936d
e7ab5f1
 
 
 
 
553c78c
 
ad7936d
 
553c78c
ad7936d
e7ab5f1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bce2d13
e7ab5f1
 
 
 
 
bce2d13
e7ab5f1
 
 
 
 
 
 
 
 
 
 
e516099
fcd4893
e7ab5f1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63ca3f6
e7ab5f1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e6c8a11
 
 
 
0d8eee8
483945f
0d8eee8
553c78c
 
 
 
 
 
 
c03793c
e6c8a11
 
c03793c
e6c8a11
 
 
 
 
 
 
e7ab5f1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25ff55e
 
 
 
 
 
 
 
e7ab5f1
 
 
 
0d8eee8
e7ab5f1
 
 
f9ba624
25ff55e
e7ab5f1
 
0d8eee8
e7ab5f1
 
 
 
 
553c78c
e7ab5f1
553c78c
 
 
 
 
 
 
 
e7ab5f1
 
 
 
 
 
3606260
 
e7ab5f1
 
553c78c
 
 
e7ab5f1
25ff55e
e7ab5f1
04264a9
e7ab5f1
04264a9
e7ab5f1
 
 
 
 
3371c9c
 
 
0e39bfb
 
3371c9c
0e39bfb
3371c9c
e01d625
95579c1
 
e01d625
e7ab5f1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
553c78c
e7ab5f1
553c78c
 
 
e7ab5f1
 
553c78c
 
95579c1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e7ab5f1
 
b4b523f
e7ab5f1
b4b523f
e7ab5f1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c03793c
 
 
e7ab5f1
c03793c
e7ab5f1
 
c03793c
e7ab5f1
c03793c
e7ab5f1
 
 
 
 
 
 
c03793c
e7ab5f1
c03793c
 
e7ab5f1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
#!/usr/bin/env python3
"""
OpenClaw HF Spaces Persistence β€” Full Directory Sync
=====================================================

Simplified persistence: upload/download the entire ~/.openclaw directory
as-is to/from a Hugging Face Dataset repo.

- Startup:  snapshot_download  β†’  ~/.openclaw
- Periodic: upload_folder      β†’  dataset openclaw_data/
- Shutdown: final upload_folder β†’  dataset openclaw_data/
"""

import os
import sys
import time
import threading
import subprocess
import signal
import json
import shutil
import tempfile
import traceback
import re
import urllib.request
import ssl
from pathlib import Path
from datetime import datetime
# Set timeout BEFORE importing huggingface_hub
os.environ.setdefault("HF_HUB_DOWNLOAD_TIMEOUT", "300")
os.environ.setdefault("HF_HUB_UPLOAD_TIMEOUT", "600")

from huggingface_hub import HfApi, snapshot_download

# ── Logging helper ──────────────────────────────────────────────────────────

class TeeLogger:
    """Duplicate output to stream and file."""
    def __init__(self, filename, stream):
        self.stream = stream
        self.file = open(filename, "a", encoding="utf-8")
    def write(self, message):
        self.stream.write(message)
        self.file.write(message)
        self.flush()
    def flush(self):
        self.stream.flush()
        self.file.flush()
    def fileno(self):
        return self.stream.fileno()

# ── Configuration ───────────────────────────────────────────────────────────

HF_TOKEN   = os.environ.get("HF_TOKEN")
OPENCLAW_HOME = Path.home() / ".openclaw"
APP_DIR       = Path("/app/openclaw")

# Use ".openclaw" - directly read/write the .openclaw folder in dataset
DATASET_PATH = ".openclaw"

# OpenAI-compatible API (OpenAI, OpenRouter, or any compatible endpoint)
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "")
OPENAI_BASE_URL = os.environ.get("OPENAI_BASE_URL", "https://api.openai.com/v1").rstrip("/")

# OpenRouter API key (optional; alternative to OPENAI_API_KEY + OPENAI_BASE_URL)
OPENROUTER_API_KEY = os.environ.get("OPENROUTER_API_KEY", "")

# Gateway token (default: huggingclaw; override via GATEWAY_TOKEN env var)
GATEWAY_TOKEN = os.environ.get("GATEWAY_TOKEN", "huggingclaw")

# Default model for new conversations (infer from provider if not set)
OPENCLAW_DEFAULT_MODEL = os.environ.get("OPENCLAW_DEFAULT_MODEL") or (
    "openai/gpt-5-nano" if OPENAI_API_KEY else "openrouter/openai/gpt-oss-20b:free"
)

# HF Spaces built-in env vars (auto-set by HF runtime)
SPACE_HOST = os.environ.get("SPACE_HOST", "")   # e.g. "tao-shen-huggingclaw.hf.space"
SPACE_ID   = os.environ.get("SPACE_ID", "")      # e.g. "tao-shen/HuggingClaw"

SYNC_INTERVAL = int(os.environ.get("SYNC_INTERVAL", "60"))
AUTO_CREATE_DATASET = os.environ.get("AUTO_CREATE_DATASET", "false").lower() in ("true", "1", "yes")

# Dataset repo: always auto-derive from SPACE_ID when not explicitly set.
# Format: {username}/{SpaceName}-data  (e.g. "your-name/YourSpace-data")
# This ensures each duplicated Space gets its own dataset automatically.
HF_REPO_ID = os.environ.get("OPENCLAW_DATASET_REPO", "")
if not HF_REPO_ID and SPACE_ID:
    # SPACE_ID = "username/SpaceName" β†’ derive "username/SpaceName-data"
    HF_REPO_ID = f"{SPACE_ID}-data"
    print(f"[SYNC] OPENCLAW_DATASET_REPO not set β€” auto-derived from SPACE_ID: {HF_REPO_ID}")
elif not HF_REPO_ID and HF_TOKEN:
    # Fallback: no SPACE_ID (local Docker), derive from HF_TOKEN username
    try:
        _api = HfApi(token=HF_TOKEN)
        _username = _api.whoami()["name"]
        HF_REPO_ID = f"{_username}/HuggingClaw-data"
        print(f"[SYNC] OPENCLAW_DATASET_REPO not set β€” auto-derived from HF_TOKEN: {HF_REPO_ID}")
        del _api, _username
    except Exception as e:
        print(f"[SYNC] WARNING: Could not derive username from HF_TOKEN: {e}")
        HF_REPO_ID = ""

# Setup logging
log_dir = OPENCLAW_HOME / "workspace"
log_dir.mkdir(parents=True, exist_ok=True)
sys.stdout = TeeLogger(log_dir / "sync.log", sys.stdout)
sys.stderr = sys.stdout

# ── Telegram API Base Auto-Probe ────────────────────────────────────────────
#
# HF Spaces blocks DNS for api.telegram.org.  grammY uses Node 22's built-in
# fetch (undici) which bypasses dns.lookup patching and /etc/hosts.
#
# Solution: probe multiple Telegram API endpoints at startup.  If the official
# endpoint is unreachable, pick the first working mirror.  Then:
#   1. Set TELEGRAM_API_ROOT env var for the Node process
#   2. telegram-proxy.cjs (loaded via NODE_OPTIONS --require) intercepts
#      globalThis.fetch() and rewrites api.telegram.org URLs to the mirror.
#
# This works without a bot token β€” we just test HTTP reachability.
# If a bot token IS available, we do a full getMe verification.

# User can force a specific base via env var (skip auto-probe)
TELEGRAM_API_BASE = os.environ.get("TELEGRAM_API_BASE", "")

TELEGRAM_API_BASES = [
    "https://api.telegram.org",                            # official
    "https://telegram-api.mykdigi.com",                    # known mirror
    "https://telegram-api-proxy-anonymous.pages.dev/api",  # Cloudflare Pages proxy
]


def probe_telegram_api(timeout: int = 8) -> str:
    """Probe Telegram API endpoints and return the first reachable one.

    First checks if official api.telegram.org is reachable (HTTP level).
    If not, tries mirrors.  No bot token required β€” just tests connectivity.
    Returns the working base URL (without trailing slash), or "" if all fail.
    """
    ctx = ssl.create_default_context()
    for base in TELEGRAM_API_BASES:
        url = base.rstrip("/") + "/"
        try:
            req = urllib.request.Request(url, method="GET")
            resp = urllib.request.urlopen(req, timeout=timeout, context=ctx)
            print(f"[TELEGRAM] βœ“ Reachable: {base} (HTTP {resp.status})")
            return base.rstrip("/")
        except urllib.error.HTTPError as e:
            # HTTP error (4xx/5xx) still means the host IS reachable
            print(f"[TELEGRAM] βœ“ Reachable: {base} (HTTP {e.code})")
            return base.rstrip("/")
        except Exception as e:
            reason = str(e)[:80]
            print(f"[TELEGRAM] βœ— Unreachable: {base} ({reason})")
            continue

    print("[TELEGRAM] WARNING: All API endpoints unreachable!")
    return ""


# ── Sync Manager ────────────────────────────────────────────────────────────

class OpenClawFullSync:
    """Upload/download the entire ~/.openclaw directory to HF Dataset."""

    def __init__(self):
        self.enabled = False
        self.dataset_exists = False
        self.api = None

        if not HF_TOKEN:
            print("[SYNC] WARNING: HF_TOKEN not set. Persistence disabled.")
            return
        if not HF_REPO_ID:
            print("[SYNC] WARNING: Could not determine dataset repo (no SPACE_ID or OPENCLAW_DATASET_REPO).")
            print("[SYNC] Persistence disabled.")
            return

        self.enabled = True
        self.api = HfApi(token=HF_TOKEN)
        self.dataset_exists = self._ensure_repo_exists()

    # ── Repo management ────────────────────────────────────────────────

    def _ensure_repo_exists(self):
        """Check if dataset repo exists; auto-create only when AUTO_CREATE_DATASET=true AND HF_TOKEN is set."""
        try:
            self.api.repo_info(repo_id=HF_REPO_ID, repo_type="dataset")
            print(f"[SYNC] Dataset repo found: {HF_REPO_ID}")
            return True
        except Exception:
            if not AUTO_CREATE_DATASET:
                print(f"[SYNC] Dataset repo NOT found: {HF_REPO_ID}")
                print(f"[SYNC]   Set AUTO_CREATE_DATASET=true to auto-create.")
                print(f"[SYNC] Persistence disabled (app will still run normally).")
                return False
            print(f"[SYNC] Dataset repo NOT found: {HF_REPO_ID} β€” creating...")
            try:
                self.api.create_repo(
                    repo_id=HF_REPO_ID,
                    repo_type="dataset",
                    private=True,
                )
                print(f"[SYNC] βœ“ Dataset repo created: {HF_REPO_ID}")
                return True
            except Exception as e:
                print(f"[SYNC] βœ— Failed to create dataset repo: {e}")
                return False

    # ── Restore (startup) ─────────────────────────────────────────────

    def load_from_repo(self):
        """Download from dataset β†’ ~/.openclaw"""
        if not self.enabled:
            print("[SYNC] Persistence disabled - skipping restore")
            self._ensure_default_config()
            self._patch_config()
            return

        if not self.dataset_exists:
            print(f"[SYNC] Dataset {HF_REPO_ID} does not exist - starting fresh")
            self._ensure_default_config()
            self._patch_config()
            return

        print(f"[SYNC] β–Ά Restoring ~/.openclaw from dataset {HF_REPO_ID} ...")
        OPENCLAW_HOME.mkdir(parents=True, exist_ok=True)

        try:
            files = self.api.list_repo_files(repo_id=HF_REPO_ID, repo_type="dataset")
            openclaw_files = [f for f in files if f.startswith(f"{DATASET_PATH}/")]
            if not openclaw_files:
                print(f"[SYNC] No {DATASET_PATH}/ folder in dataset. Starting fresh.")
                self._ensure_default_config()
                self._patch_config()
                return

            print(f"[SYNC] Found {len(openclaw_files)} files under {DATASET_PATH}/ in dataset")

            with tempfile.TemporaryDirectory() as tmpdir:
                snapshot_download(
                    repo_id=HF_REPO_ID,
                    repo_type="dataset",
                    allow_patterns=f"{DATASET_PATH}/**",
                    local_dir=tmpdir,
                    token=HF_TOKEN,
                )
                downloaded_root = Path(tmpdir) / DATASET_PATH
                if downloaded_root.exists():
                    for item in downloaded_root.rglob("*"):
                        if item.is_file():
                            rel = item.relative_to(downloaded_root)
                            dest = OPENCLAW_HOME / rel
                            dest.parent.mkdir(parents=True, exist_ok=True)
                            shutil.copy2(str(item), str(dest))
                    print("[SYNC] βœ“ Restore completed.")
                else:
                    print("[SYNC] Downloaded snapshot but dir not found. Starting fresh.")

        except Exception as e:
            print(f"[SYNC] βœ— Restore failed: {e}")
            traceback.print_exc()

        # Patch config after restore
        self._patch_config()
        self._debug_list_files()

    # ── Save (periodic + shutdown) ─────────────────────────────────────

    def save_to_repo(self):
        """Upload entire ~/.openclaw directory β†’ dataset (all files, no filtering)"""
        if not self.enabled:
            return
        if not OPENCLAW_HOME.exists():
            print("[SYNC] ~/.openclaw does not exist, nothing to save.")
            return

        # Ensure dataset exists (auto-create if needed)
        if not self._ensure_repo_exists():
            print(f"[SYNC] Dataset {HF_REPO_ID} unavailable - skipping save")
            return

        print(f"[SYNC] β–Ά Uploading ~/.openclaw β†’ dataset {HF_REPO_ID}/{DATASET_PATH}/ ...")

        try:
            # Log what will be uploaded
            total_size = 0
            file_count = 0
            for root, dirs, fls in os.walk(OPENCLAW_HOME):
                for fn in fls:
                    fp = os.path.join(root, fn)
                    sz = os.path.getsize(fp)
                    total_size += sz
                    file_count += 1
                    rel = os.path.relpath(fp, OPENCLAW_HOME)
                    print(f"[SYNC]   uploading: {rel} ({sz} bytes)")
            print(f"[SYNC] Uploading: {file_count} files, {total_size} bytes total")

            if file_count == 0:
                print("[SYNC] Nothing to upload.")
                return

            # Upload directory, excluding large log files that trigger LFS rejection
            self.api.upload_folder(
                folder_path=str(OPENCLAW_HOME),
                path_in_repo=DATASET_PATH,
                repo_id=HF_REPO_ID,
                repo_type="dataset",
                token=HF_TOKEN,
                commit_message=f"Sync .openclaw β€” {datetime.now().isoformat()}",
                ignore_patterns=[
                    "*.log",        # Log files (sync.log, startup.log) β€” regenerated on boot
                    "*.lock",       # Lock files β€” stale after restart
                    "*.tmp",        # Temp files
                    "*.pid",        # PID files
                    "__pycache__",  # Python cache
                ],
            )
            print(f"[SYNC] βœ“ Upload completed at {datetime.now().isoformat()}")

            # Verify
            try:
                files = self.api.list_repo_files(repo_id=HF_REPO_ID, repo_type="dataset")
                oc_files = [f for f in files if f.startswith(f"{DATASET_PATH}/")]
                print(f"[SYNC] Dataset now has {len(oc_files)} files under {DATASET_PATH}/")
                for f in oc_files[:30]:
                    print(f"[SYNC]   {f}")
                if len(oc_files) > 30:
                    print(f"[SYNC]   ... and {len(oc_files) - 30} more")
            except Exception:
                pass

        except Exception as e:
            print(f"[SYNC] βœ— Upload failed: {e}")
            traceback.print_exc()

    # ── Config helpers ─────────────────────────────────────────────────

    def _ensure_default_config(self):
        config_path = OPENCLAW_HOME / "openclaw.json"
        if config_path.exists():
            return
        default_src = Path(__file__).parent / "openclaw.json.default"
        if default_src.exists():
            shutil.copy2(str(default_src), str(config_path))
            # Replace placeholder or remove provider if no API key
            try:
                with open(config_path, "r") as f:
                    cfg = json.load(f)
                # Set gateway token
                if "gateway" in cfg:
                    cfg["gateway"]["auth"] = {"token": GATEWAY_TOKEN}
                if OPENAI_API_KEY and "models" in cfg and "providers" in cfg["models"] and "openai" in cfg["models"]["providers"]:
                    cfg["models"]["providers"]["openai"]["apiKey"] = OPENAI_API_KEY
                    if OPENAI_BASE_URL:
                        cfg["models"]["providers"]["openai"]["baseUrl"] = OPENAI_BASE_URL
                elif "models" in cfg and "providers" in cfg["models"]:
                    if not OPENAI_API_KEY:
                        cfg["models"]["providers"].pop("openai", None)
                if OPENROUTER_API_KEY:
                    if "models" in cfg and "providers" in cfg["models"] and "openrouter" in cfg["models"]["providers"]:
                        cfg["models"]["providers"]["openrouter"]["apiKey"] = OPENROUTER_API_KEY
                else:
                    if "models" in cfg and "providers" in cfg["models"]:
                        cfg["models"]["providers"].pop("openrouter", None)
                    print("[SYNC] No OPENROUTER_API_KEY β€” removed openrouter provider from config")
                with open(config_path, "w") as f:
                    json.dump(cfg, f, indent=2)
            except Exception as e:
                print(f"[SYNC] Warning: failed to patch default config: {e}")
            print("[SYNC] Created openclaw.json from default template")
        else:
            with open(config_path, "w") as f:
                json.dump({
                    "gateway": {
                        "mode": "local", "bind": "lan", "port": 7860,
                        "trustedProxies": ["0.0.0.0/0"],
                        "controlUi": {
                            "allowInsecureAuth": True,
                            "allowedOrigins": [
                                "https://huggingface.co"
                            ]
                        }
                    },
                    "session": {"scope": "global"},
                    "models": {"mode": "merge", "providers": {}},
                    "agents": {"defaults": {"workspace": "~/.openclaw/workspace"}}
                }, f)
            print("[SYNC] Created minimal openclaw.json")

    def _patch_config(self):
        """Ensure critical settings after restore."""
        config_path = OPENCLAW_HOME / "openclaw.json"
        if not config_path.exists():
            self._ensure_default_config()
            return

        print("[SYNC] Patching configuration...")
        try:
            with open(config_path, "r") as f:
                data = json.load(f)
            print("[SYNC] Config parsed OK.")
        except (json.JSONDecodeError, Exception) as e:
            # Config is corrupt β€” back up and start fresh
            print(f"[SYNC] Config JSON is corrupt: {e}")
            backup = config_path.with_suffix(f".corrupt_{int(time.time())}")
            try:
                shutil.copy2(config_path, backup)
                print(f"[SYNC] Backed up corrupt config to {backup.name}")
            except Exception:
                pass
            data = {}
            print("[SYNC] Starting from clean config.")

        try:
            # Remove /dev/null from plugins.locations
            if "plugins" in data and isinstance(data.get("plugins"), dict):
                locs = data["plugins"].get("locations", [])
                if isinstance(locs, list) and "/dev/null" in locs:
                    data["plugins"]["locations"] = [l for l in locs if l != "/dev/null"]

            # Force full gateway config for HF Spaces
            # Dynamic allowedOrigins from SPACE_HOST (auto-set by HF runtime)
            allowed_origins = [
                "https://huggingface.co",
                "https://*.hf.space",
            ]
            if SPACE_HOST:
                allowed_origins.append(f"https://{SPACE_HOST}")
                print(f"[SYNC] SPACE_HOST detected: {SPACE_HOST}")
            data["gateway"] = {
                "mode": "local",
                "bind": "lan",
                "port": 7860,
                "auth": {"token": GATEWAY_TOKEN},
                "trustedProxies": ["0.0.0.0/0"],
                "controlUi": {
                    "allowInsecureAuth": True,
                    "dangerouslyDisableDeviceAuth": True,
                    "allowedOrigins": allowed_origins
                }
            }
            print(f"[SYNC] Set gateway config (auth=token, origins={len(allowed_origins)})")

            # Ensure agents defaults
            data.setdefault("agents", {}).setdefault("defaults", {}).setdefault("model", {})
            data.setdefault("session", {})["scope"] = "global"

            # OpenAI-compatible provider (OPENAI_API_KEY + optional OPENAI_BASE_URL)
            data.setdefault("models", {}).setdefault("providers", {})
            if OPENAI_API_KEY:
                data["models"]["providers"]["openai"] = {
                    "baseUrl": OPENAI_BASE_URL,
                    "apiKey": OPENAI_API_KEY,
                    "api": "openai-completions",
                }
                print(f"[SYNC] Set OpenAI-compatible provider (baseUrl={OPENAI_BASE_URL})")
            # OpenRouter provider (optional)
            if OPENROUTER_API_KEY:
                data["models"]["providers"]["openrouter"] = {
                    "baseUrl": "https://openrouter.ai/api/v1",
                    "apiKey": OPENROUTER_API_KEY,
                    "api": "openai-completions",
                    "models": [
                        {"id": "openai/gpt-oss-20b:free", "name": "GPT-OSS-20B (Free)"},
                        {"id": "deepseek/deepseek-chat:free", "name": "DeepSeek V3 (Free)"}
                    ]
                }
                print("[SYNC] Set OpenRouter provider")
            if not OPENAI_API_KEY and not OPENROUTER_API_KEY:
                print("[SYNC] WARNING: No OPENAI_API_KEY or OPENROUTER_API_KEY set, LLM features may not work")
            data["models"]["providers"].pop("gemini", None)
            data["agents"]["defaults"]["model"]["primary"] = OPENCLAW_DEFAULT_MODEL

            # Plugin whitelist (only load telegram + whatsapp to speed up startup)
            data.setdefault("plugins", {}).setdefault("entries", {})
            data["plugins"]["allow"] = ["telegram", "whatsapp"]
            if "telegram" not in data["plugins"]["entries"]:
                data["plugins"]["entries"]["telegram"] = {"enabled": True}
            elif isinstance(data["plugins"]["entries"]["telegram"], dict):
                data["plugins"]["entries"]["telegram"]["enabled"] = True

            # ── Telegram channel defaults (open DM policy for HF Spaces) ──
            # Personal bot on HF Spaces β€” no need for strict pairing.
            tg_ch = data.setdefault("channels", {}).setdefault("telegram", {})
            tg_ch["dmPolicy"] = "open"
            tg_ch["allowFrom"] = ["*"]
            tg_ch["configWrites"] = True
            print("[SYNC] Set channels.telegram: dmPolicy=open, allowFrom=[*], configWrites=true")

            # ── Telegram API base auto-probe ──────────────────────────────
            # Probe is done in run_openclaw() β€” sets TELEGRAM_API_ROOT env var
            # for the telegram-proxy.cjs preload script to intercept fetch().

            with open(config_path, "w") as f:
                json.dump(data, f, indent=2)
            print("[SYNC] Config patched and saved.")

            # Verify write
            with open(config_path, "r") as f:
                verify_data = json.load(f)
                gw = verify_data.get("gateway", {})
                providers = list(verify_data.get("models", {}).get("providers", {}).keys())
                primary = verify_data.get("agents", {}).get("defaults", {}).get("model", {}).get("primary")
                print(f"[SYNC] VERIFY: gateway.port={gw.get('port')}, providers={providers}, primary={primary}")

        except Exception as e:
            print(f"[SYNC] Failed to patch config: {e}")
            traceback.print_exc()

    def _debug_list_files(self):
        print(f"[SYNC] Local ~/.openclaw tree:")
        try:
            count = 0
            for root, dirs, files in os.walk(OPENCLAW_HOME):
                dirs[:] = [d for d in dirs if d not in {".cache", "node_modules", "__pycache__"}]
                for name in sorted(files):
                    rel = os.path.relpath(os.path.join(root, name), OPENCLAW_HOME)
                    print(f"[SYNC]   {rel}")
                    count += 1
                    if count > 50:
                        print("[SYNC]   ... (truncated)")
                        return
        except Exception as e:
            print(f"[SYNC] listing failed: {e}")

    # ── Background sync loop ──────────────────────────────────────────

    def background_sync_loop(self, stop_event):
        print(f"[SYNC] Background sync started (interval={SYNC_INTERVAL}s)")
        while not stop_event.is_set():
            if stop_event.wait(timeout=SYNC_INTERVAL):
                break
            print(f"[SYNC] ── Periodic sync triggered at {datetime.now().isoformat()} ──")
            self.save_to_repo()

    # ── Application runner ─────────────────────────────────────────────

    def run_openclaw(self):
        log_file = OPENCLAW_HOME / "workspace" / "startup.log"
        log_file.parent.mkdir(parents=True, exist_ok=True)

        # Debug: check if app directory exists
        if not Path(APP_DIR).exists():
            print(f"[SYNC] ERROR: App directory does not exist: {APP_DIR}")
            return None

        # Debug: check if dist/entry.js exists
        entry_js = Path(APP_DIR) / "dist" / "entry.js"
        if not entry_js.exists():
            print(f"[SYNC] ERROR: dist/entry.js not found in {APP_DIR}")
            return None

        # Use subprocess.run with direct output, no shell pipe
        print(f"[SYNC] Launching: node dist/entry.js gateway")
        print(f"[SYNC] Working directory: {APP_DIR}")
        print(f"[SYNC] Entry point exists: {entry_js}")
        print(f"[SYNC] Log file: {log_file}")

        # Open log file
        log_fh = open(log_file, "a")

        # Prepare environment (all API keys passed through for OpenClaw)
        env = os.environ.copy()
        if OPENAI_API_KEY:
            env["OPENAI_API_KEY"] = OPENAI_API_KEY
            env["OPENAI_BASE_URL"] = OPENAI_BASE_URL
        if OPENROUTER_API_KEY:
            env["OPENROUTER_API_KEY"] = OPENROUTER_API_KEY
        if not OPENAI_API_KEY and not OPENROUTER_API_KEY:
            print(f"[SYNC] WARNING: No OPENAI_API_KEY or OPENROUTER_API_KEY set, LLM features may not work")

        # ── Telegram API base probe ──────────────────────────────────────
        # Determine working Telegram API endpoint and set env var for
        # telegram-proxy.cjs to intercept fetch() calls.
        if TELEGRAM_API_BASE:
            tg_root = TELEGRAM_API_BASE.rstrip("/")
            print(f"[TELEGRAM] Using user-specified API base: {tg_root}")
        else:
            print("[TELEGRAM] Probing Telegram API endpoints...")
            tg_root = probe_telegram_api()

        if tg_root and tg_root != "https://api.telegram.org":
            env["TELEGRAM_API_ROOT"] = tg_root
            print(f"[TELEGRAM] Set TELEGRAM_API_ROOT={tg_root}")
            print(f"[TELEGRAM] telegram-proxy.cjs will redirect fetch() calls")
        elif tg_root:
            print("[TELEGRAM] Official API reachable β€” no proxy needed")
        else:
            print("[TELEGRAM] No reachable endpoint found β€” Telegram will not work")
        try:
            # Use Popen without shell to avoid pipe issues
            # auth disabled in config β€” no token needed
            process = subprocess.Popen(
                ["node", "dist/entry.js", "gateway"],
                cwd=str(APP_DIR),
                stdout=subprocess.PIPE,  # Capture so we can log it
                stderr=subprocess.STDOUT,
                text=True,
                bufsize=1,  # Line buffered
                env=env  # Pass environment with OPENROUTER_API_KEY
            )

            # Create a thread to copy output to both log file and stdout
            def copy_output():
                try:
                    for line in process.stdout:
                        log_fh.write(line)
                        log_fh.flush()
                        print(line, end='')  # Also print to console
                except Exception as e:
                    print(f"[SYNC] Output copy error: {e}")
                finally:
                    log_fh.close()

            thread = threading.Thread(target=copy_output, daemon=True)
            thread.start()

            print(f"[SYNC] Process started with PID: {process.pid}")
            return process

        except Exception as e:
            log_fh.close()
            print(f"[SYNC] ERROR: Failed to start process: {e}")
            traceback.print_exc()
            return None

# ── Main ────────────────────────────────────────────────────────────────────

def main():
    try:
        t_main_start = time.time()

        t0 = time.time()
        sync = OpenClawFullSync()
        print(f"[TIMER] sync_hf init: {time.time() - t0:.1f}s")

        # 1. Restore
        t0 = time.time()
        sync.load_from_repo()
        print(f"[TIMER] load_from_repo (restore): {time.time() - t0:.1f}s")

        # 2. Background sync
        stop_event = threading.Event()
        t = threading.Thread(target=sync.background_sync_loop, args=(stop_event,), daemon=True)
        t.start()

        # 3. Start application
        t0 = time.time()
        process = sync.run_openclaw()
        print(f"[TIMER] run_openclaw launch: {time.time() - t0:.1f}s")
        print(f"[TIMER] Total startup (init β†’ app launched): {time.time() - t_main_start:.1f}s")

        # Signal handler
        def handle_signal(sig, frame):
            print(f"\n[SYNC] Signal {sig} received. Shutting down...")
            stop_event.set()
            # Wait for background sync to finish if it's running
            t.join(timeout=10)
            if process:
                process.terminate()
                try:
                    process.wait(timeout=5)
                except subprocess.TimeoutExpired:
                    process.kill()
            print("[SYNC] Final sync...")
            sync.save_to_repo()
            sys.exit(0)

        signal.signal(signal.SIGINT, handle_signal)
        signal.signal(signal.SIGTERM, handle_signal)

        # Wait
        if process is None:
            print("[SYNC] ERROR: Failed to start OpenClaw process. Exiting.")
            stop_event.set()
            t.join(timeout=5)
            sys.exit(1)

        exit_code = process.wait()
        print(f"[SYNC] OpenClaw exited with code {exit_code}")
        stop_event.set()
        t.join(timeout=10)
        print("[SYNC] Final sync...")
        sync.save_to_repo()
        sys.exit(exit_code)

    except Exception as e:
        print(f"[SYNC] FATAL ERROR in main: {e}")
        traceback.print_exc()
        sys.exit(1)


if __name__ == "__main__":
    main()