File size: 34,982 Bytes
b19d31a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55bff24
 
b19d31a
 
 
 
 
2f79e09
 
 
 
 
 
 
 
b19d31a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0b594d1
 
 
 
 
b19d31a
 
3227ad9
306fbc6
 
5a096f1
 
af9539c
8ee58f9
 
 
 
0b594d1
 
306fbc6
ae76fba
306fbc6
0b594d1
7cd8723
 
 
 
 
082f8f4
66d172d
b19d31a
8c4d233
750878a
8c4d233
7728a08
8c4d233
 
7728a08
 
8c4d233
 
3ff709d
 
 
8f454de
7728a08
8f454de
3ff709d
 
 
 
b19d31a
 
 
 
 
 
55bff24
d88aa6c
 
 
 
 
 
 
 
 
 
 
 
55bff24
 
 
 
 
d88aa6c
 
55bff24
 
 
 
d88aa6c
 
 
 
 
 
 
55bff24
 
d88aa6c
55bff24
 
 
d88aa6c
 
 
 
 
 
55bff24
 
d88aa6c
55bff24
 
d88aa6c
55bff24
 
 
b19d31a
 
 
 
 
 
 
 
 
 
 
 
 
 
3f16497
3ff709d
b19d31a
 
 
 
 
 
 
 
 
3f16497
b19d31a
 
 
 
 
0b594d1
 
3f16497
 
0b594d1
3f16497
b19d31a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a0edddc
b19d31a
 
 
 
 
a0edddc
b19d31a
 
 
 
 
 
 
 
 
 
 
cf93629
dd40c50
b19d31a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4a2cb8a
b19d31a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2f79e09
b19d31a
 
 
 
 
2f79e09
b19d31a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2f79e09
b19d31a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98cdf74
 
 
 
5a096f1
8814392
5a096f1
0b594d1
 
 
 
 
 
 
0d9d280
98cdf74
 
0d9d280
98cdf74
 
 
 
 
 
 
b19d31a
 
 
 
 
2778fda
b19d31a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b17d83b
 
 
 
 
 
 
 
 
 
b19d31a
7cd8723
 
 
 
 
 
 
 
b19d31a
 
 
2778fda
9da0d8a
b19d31a
 
 
d2e3735
7cd8723
b19d31a
 
2778fda
b19d31a
 
 
 
 
e3478dc
 
 
0b594d1
e3478dc
0b594d1
 
 
 
 
b19d31a
e3478dc
b19d31a
 
 
 
3f7c1c9
 
b19d31a
 
0b594d1
306fbc6
e3478dc
3227ad9
306fbc6
3227ad9
df4b021
ae76fba
 
 
 
df4b021
306fbc6
 
e3478dc
306fbc6
e3478dc
7cd8723
b19d31a
8ee58f9
b19d31a
8ee58f9
 
 
 
b19d31a
 
 
 
 
8ee58f9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9da0d8a
8ee58f9
 
 
 
 
 
27fc3a4
 
 
1be188a
 
27fc3a4
1be188a
27fc3a4
55bff24
d88aa6c
 
55bff24
b19d31a
 
 
 
d865e6e
1e3d376
 
 
 
 
 
d865e6e
b19d31a
 
 
 
 
 
 
 
 
 
 
 
 
 
2f79e09
 
b19d31a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f18a4b6
b19d31a
f18a4b6
 
 
 
 
 
 
 
 
 
 
 
b19d31a
 
 
f18a4b6
b19d31a
 
 
 
 
 
0b594d1
b19d31a
0b594d1
 
 
b19d31a
 
306fbc6
 
 
 
d88aa6c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b19d31a
 
f10fd84
b19d31a
f18a4b6
b19d31a
 
 
 
 
 
 
 
2f79e09
b19d31a
 
 
 
 
2f79e09
 
 
 
 
 
 
 
 
 
 
 
 
b19d31a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0d9d280
 
 
b19d31a
0d9d280
b19d31a
 
0d9d280
b19d31a
0d9d280
b19d31a
 
 
 
 
 
 
0d9d280
b19d31a
0d9d280
 
b19d31a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
#!/usr/bin/env python3
"""
OpenClaw HF Spaces Persistence β€” Full Directory Sync
=====================================================

Simplified persistence: upload/download the entire ~/.openclaw directory
as-is to/from a Hugging Face Dataset repo.

- Startup:  snapshot_download  β†’  ~/.openclaw
- Periodic: upload_folder      β†’  dataset openclaw_data/
- Shutdown: final upload_folder β†’  dataset openclaw_data/
"""

import os
import sys
import time
import threading
import subprocess
import signal
import json
import shutil
import tempfile
import traceback
import re
import urllib.request
import ssl
from pathlib import Path
from datetime import datetime
# Set timeout BEFORE importing huggingface_hub
os.environ.setdefault("HF_HUB_DOWNLOAD_TIMEOUT", "300")
os.environ.setdefault("HF_HUB_UPLOAD_TIMEOUT", "600")
# Suppress huggingface_hub progress bars and verbose download/upload logs
os.environ.setdefault("HF_HUB_DISABLE_PROGRESS_BARS", "1")
os.environ.setdefault("HF_HUB_VERBOSITY", "warning")

import logging as _logging
_logging.getLogger("huggingface_hub").setLevel(_logging.WARNING)
_logging.getLogger("huggingface_hub.utils").setLevel(_logging.WARNING)
_logging.getLogger("filelock").setLevel(_logging.WARNING)

from huggingface_hub import HfApi, snapshot_download

# ── Logging helper ──────────────────────────────────────────────────────────

class TeeLogger:
    """Duplicate output to stream and file."""
    def __init__(self, filename, stream):
        self.stream = stream
        self.file = open(filename, "a", encoding="utf-8")
    def write(self, message):
        self.stream.write(message)
        self.file.write(message)
        self.flush()
    def flush(self):
        self.stream.flush()
        self.file.flush()
    def fileno(self):
        return self.stream.fileno()

# ── Configuration ───────────────────────────────────────────────────────────

HF_TOKEN   = os.environ.get("HF_TOKEN")
OPENCLAW_HOME = Path.home() / ".openclaw"
APP_DIR       = Path("/app/openclaw")

# Use ".openclaw" - directly read/write the .openclaw folder in dataset
DATASET_PATH = ".openclaw"

# OpenAI-compatible API (OpenAI, OpenRouter, or any compatible endpoint)
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "")
OPENAI_BASE_URL = os.environ.get("OPENAI_BASE_URL", "https://api.openai.com/v1").rstrip("/")

# OpenRouter API key (optional; alternative to OPENAI_API_KEY + OPENAI_BASE_URL)
OPENROUTER_API_KEY = os.environ.get("OPENROUTER_API_KEY", "")

# Zhipu AI (z.ai) API key (optional; GLM-4 series, Anthropic-compatible endpoint)
ZHIPU_API_KEY = os.environ.get("ZHIPU_API_KEY", "")

# Gateway token (default: huggingclaw; override via GATEWAY_TOKEN env var)
GATEWAY_TOKEN = os.environ.get("GATEWAY_TOKEN", "huggingclaw")

# A2A configuration (optional; only activated when A2A_PEERS is set)
AGENT_NAME = os.environ.get("AGENT_NAME", "HuggingClaw")
A2A_PEERS = os.environ.get("A2A_PEERS", "")  # comma-separated peer URLs

# Default model for new conversations (infer from provider if not set)
OPENCLAW_DEFAULT_MODEL = os.environ.get("OPENCLAW_DEFAULT_MODEL") or (
    "openai/gpt-5-nano" if OPENAI_API_KEY
    else "zhipu/glm-4.5-air" if ZHIPU_API_KEY
    else "openrouter/openai/gpt-oss-20b:free"
)

# HF Spaces built-in env vars (auto-set by HF runtime)
SPACE_HOST = os.environ.get("SPACE_HOST", "")   # e.g. "tao-shen-huggingclaw.hf.space"
SPACE_ID   = os.environ.get("SPACE_ID", "")      # e.g. "tao-shen/HuggingClaw"

SYNC_INTERVAL = int(os.environ.get("SYNC_INTERVAL", "60"))
AUTO_CREATE_DATASET = os.environ.get("AUTO_CREATE_DATASET", "false").lower() in ("true", "1", "yes")

# Dataset repo: always auto-derive from SPACE_ID when not explicitly set.
# Format: {username}/{SpaceName}-data  (e.g. "your-name/YourSpace-data")
# This ensures each duplicated Space gets its own dataset automatically.
HF_REPO_ID = os.environ.get("OPENCLAW_DATASET_REPO", "")
if not HF_REPO_ID and SPACE_ID:
    # SPACE_ID = "username/SpaceName" β†’ derive "username/SpaceName-data"
    HF_REPO_ID = f"{SPACE_ID}-data"
    print(f"[SYNC] OPENCLAW_DATASET_REPO not set β€” auto-derived from SPACE_ID: {HF_REPO_ID}")
elif not HF_REPO_ID and HF_TOKEN:
    # Fallback: no SPACE_ID (local Docker), derive from HF_TOKEN username
    try:
        _api = HfApi(token=HF_TOKEN)
        _username = _api.whoami()["name"]
        HF_REPO_ID = f"{_username}/HuggingClaw-data"
        print(f"[SYNC] OPENCLAW_DATASET_REPO not set β€” auto-derived from HF_TOKEN: {HF_REPO_ID}")
        del _api, _username
    except Exception as e:
        print(f"[SYNC] WARNING: Could not derive username from HF_TOKEN: {e}")
        HF_REPO_ID = ""

# Setup logging
log_dir = OPENCLAW_HOME / "workspace"
log_dir.mkdir(parents=True, exist_ok=True)
sys.stdout = TeeLogger(log_dir / "sync.log", sys.stdout)
sys.stderr = sys.stdout

# ── Telegram API Base Auto-Probe ────────────────────────────────────────────
#
# HF Spaces blocks DNS for api.telegram.org.  grammY uses Node 22's built-in
# fetch (undici) which bypasses dns.lookup patching and /etc/hosts.
#
# Solution: probe multiple Telegram API endpoints at startup.  If the official
# endpoint is unreachable, pick the first working mirror.  Then:
#   1. Set TELEGRAM_API_ROOT env var for the Node process
#   2. telegram-proxy.cjs (loaded via NODE_OPTIONS --require) intercepts
#      globalThis.fetch() and rewrites api.telegram.org URLs to the mirror.
#
# This works without a bot token β€” we just test HTTP reachability.
# If a bot token IS available, we do a full getMe verification.

# User can force a specific base via env var (skip auto-probe)
TELEGRAM_API_BASE = os.environ.get("TELEGRAM_API_BASE", "")

TELEGRAM_API_BASES = [
    "https://api.telegram.org",                            # official
    "https://telegram-api.mykdigi.com",                    # known mirror
    "https://telegram-api-proxy-anonymous.pages.dev/api",  # Cloudflare Pages proxy
]


def probe_telegram_api(timeout: int = 8) -> str:
    """Probe Telegram API endpoints and return the first reachable one.

    First checks if official api.telegram.org is reachable (HTTP level).
    If not, tries mirrors.  No bot token required β€” just tests connectivity.
    Returns the working base URL (without trailing slash), or "" if all fail.
    """
    ctx = ssl.create_default_context()
    for base in TELEGRAM_API_BASES:
        url = base.rstrip("/") + "/"
        try:
            req = urllib.request.Request(url, method="GET")
            resp = urllib.request.urlopen(req, timeout=timeout, context=ctx)
            print(f"[TELEGRAM] βœ“ Reachable: {base} (HTTP {resp.status})")
            return base.rstrip("/")
        except urllib.error.HTTPError as e:
            # HTTP error (4xx/5xx) still means the host IS reachable
            print(f"[TELEGRAM] βœ“ Reachable: {base} (HTTP {e.code})")
            return base.rstrip("/")
        except Exception as e:
            reason = str(e)[:80]
            print(f"[TELEGRAM] βœ— Unreachable: {base} ({reason})")
            continue

    print("[TELEGRAM] WARNING: All API endpoints unreachable!")
    return ""


# ── Sync Manager ────────────────────────────────────────────────────────────

class OpenClawFullSync:
    """Upload/download the entire ~/.openclaw directory to HF Dataset."""

    def __init__(self):
        self.enabled = False
        self.dataset_exists = False
        self.api = None

        if not HF_TOKEN:
            print("[SYNC] WARNING: HF_TOKEN not set. Persistence disabled.")
            return
        if not HF_REPO_ID:
            print("[SYNC] WARNING: Could not determine dataset repo (no SPACE_ID or OPENCLAW_DATASET_REPO).")
            print("[SYNC] Persistence disabled.")
            return

        self.enabled = True
        self.api = HfApi(token=HF_TOKEN)
        self.dataset_exists = self._ensure_repo_exists()

    # ── Repo management ────────────────────────────────────────────────

    def _ensure_repo_exists(self):
        """Check if dataset repo exists; auto-create only when AUTO_CREATE_DATASET=true AND HF_TOKEN is set."""
        try:
            self.api.repo_info(repo_id=HF_REPO_ID, repo_type="dataset")
            print(f"[SYNC] Dataset repo found: {HF_REPO_ID}")
            return True
        except Exception:
            if not AUTO_CREATE_DATASET:
                print(f"[SYNC] Dataset repo NOT found: {HF_REPO_ID}")
                print(f"[SYNC]   Set AUTO_CREATE_DATASET=true to auto-create.")
                print(f"[SYNC] Persistence disabled (app will still run normally).")
                return False
            print(f"[SYNC] Dataset repo NOT found: {HF_REPO_ID} β€” creating...")
            try:
                self.api.create_repo(
                    repo_id=HF_REPO_ID,
                    repo_type="dataset",
                    private=True,
                )
                print(f"[SYNC] βœ“ Dataset repo created: {HF_REPO_ID}")
                return True
            except Exception as e:
                print(f"[SYNC] βœ— Failed to create dataset repo: {e}")
                return False

    # ── Restore (startup) ─────────────────────────────────────────────

    def load_from_repo(self):
        """Download from dataset β†’ ~/.openclaw"""
        if not self.enabled:
            print("[SYNC] Persistence disabled - skipping restore")
            self._ensure_default_config()
            self._patch_config()
            return

        if not self.dataset_exists:
            print(f"[SYNC] Dataset {HF_REPO_ID} does not exist - starting fresh")
            self._ensure_default_config()
            self._patch_config()
            return

        print(f"[SYNC] β–Ά Restoring ~/.openclaw from dataset {HF_REPO_ID} ...")
        OPENCLAW_HOME.mkdir(parents=True, exist_ok=True)

        try:
            files = self.api.list_repo_files(repo_id=HF_REPO_ID, repo_type="dataset")
            openclaw_files = [f for f in files if f.startswith(f"{DATASET_PATH}/")]
            if not openclaw_files:
                print(f"[SYNC] No {DATASET_PATH}/ folder in dataset. Starting fresh.")
                self._ensure_default_config()
                self._patch_config()
                return

            print(f"[SYNC] Found {len(openclaw_files)} files under {DATASET_PATH}/ in dataset")

            with tempfile.TemporaryDirectory() as tmpdir:
                snapshot_download(
                    repo_id=HF_REPO_ID,
                    repo_type="dataset",
                    allow_patterns=f"{DATASET_PATH}/**",
                    local_dir=tmpdir,
                    token=HF_TOKEN,
                )
                downloaded_root = Path(tmpdir) / DATASET_PATH
                if downloaded_root.exists():
                    for item in downloaded_root.rglob("*"):
                        if item.is_file():
                            rel = item.relative_to(downloaded_root)
                            dest = OPENCLAW_HOME / rel
                            dest.parent.mkdir(parents=True, exist_ok=True)
                            shutil.copy2(str(item), str(dest))
                    print("[SYNC] βœ“ Restore completed.")
                else:
                    print("[SYNC] Downloaded snapshot but dir not found. Starting fresh.")

        except Exception as e:
            print(f"[SYNC] βœ— Restore failed: {e}")
            traceback.print_exc()

        # Patch config after restore
        self._patch_config()
        self._debug_list_files()

    # ── Save (periodic + shutdown) ─────────────────────────────────────

    def save_to_repo(self):
        """Upload entire ~/.openclaw directory β†’ dataset (all files, no filtering)"""
        if not self.enabled:
            return
        if not OPENCLAW_HOME.exists():
            print("[SYNC] ~/.openclaw does not exist, nothing to save.")
            return

        # Ensure dataset exists (auto-create if needed)
        if not self._ensure_repo_exists():
            print(f"[SYNC] Dataset {HF_REPO_ID} unavailable - skipping save")
            return

        print(f"[SYNC] β–Ά Uploading ~/.openclaw β†’ dataset {HF_REPO_ID}/{DATASET_PATH}/ ...")

        try:
            # Count files to upload (no per-file logging to reduce noise)
            total_size = 0
            file_count = 0
            for root, dirs, fls in os.walk(OPENCLAW_HOME):
                for fn in fls:
                    fp = os.path.join(root, fn)
                    total_size += os.path.getsize(fp)
                    file_count += 1
            print(f"[SYNC] Uploading: {file_count} files, {total_size} bytes total")

            if file_count == 0:
                print("[SYNC] Nothing to upload.")
                return

            # Upload directory, excluding large log files that trigger LFS rejection
            self.api.upload_folder(
                folder_path=str(OPENCLAW_HOME),
                path_in_repo=DATASET_PATH,
                repo_id=HF_REPO_ID,
                repo_type="dataset",
                token=HF_TOKEN,
                commit_message=f"Sync .openclaw β€” {datetime.now().isoformat()}",
                ignore_patterns=[
                    "*.log",        # Log files (sync.log, startup.log) β€” regenerated on boot
                    "*.lock",       # Lock files β€” stale after restart
                    "*.tmp",        # Temp files
                    "*.pid",        # PID files
                    "__pycache__",  # Python cache
                ],
            )
            print(f"[SYNC] βœ“ Upload completed at {datetime.now().isoformat()}")

            # Verify (summary only)
            try:
                files = self.api.list_repo_files(repo_id=HF_REPO_ID, repo_type="dataset")
                oc_files = [f for f in files if f.startswith(f"{DATASET_PATH}/")]
                print(f"[SYNC] Dataset now has {len(oc_files)} files under {DATASET_PATH}/")
            except Exception:
                pass

        except Exception as e:
            print(f"[SYNC] βœ— Upload failed: {e}")
            traceback.print_exc()

    # ── Config helpers ─────────────────────────────────────────────────

    def _ensure_default_config(self):
        config_path = OPENCLAW_HOME / "openclaw.json"
        if config_path.exists():
            return
        default_src = Path(__file__).parent / "openclaw.json.default"
        if default_src.exists():
            shutil.copy2(str(default_src), str(config_path))
            # Replace placeholder or remove provider if no API key
            try:
                with open(config_path, "r") as f:
                    cfg = json.load(f)
                # Set gateway token
                if "gateway" in cfg:
                    cfg["gateway"]["auth"] = {"token": GATEWAY_TOKEN}
                if OPENAI_API_KEY and "models" in cfg and "providers" in cfg["models"] and "openai" in cfg["models"]["providers"]:
                    cfg["models"]["providers"]["openai"]["apiKey"] = OPENAI_API_KEY
                    if OPENAI_BASE_URL:
                        cfg["models"]["providers"]["openai"]["baseUrl"] = OPENAI_BASE_URL
                elif "models" in cfg and "providers" in cfg["models"]:
                    if not OPENAI_API_KEY:
                        cfg["models"]["providers"].pop("openai", None)
                if OPENROUTER_API_KEY:
                    if "models" in cfg and "providers" in cfg["models"] and "openrouter" in cfg["models"]["providers"]:
                        cfg["models"]["providers"]["openrouter"]["apiKey"] = OPENROUTER_API_KEY
                else:
                    if "models" in cfg and "providers" in cfg["models"]:
                        cfg["models"]["providers"].pop("openrouter", None)
                    print("[SYNC] No OPENROUTER_API_KEY β€” removed openrouter provider from config")
                with open(config_path, "w") as f:
                    json.dump(cfg, f, indent=2)
            except Exception as e:
                print(f"[SYNC] Warning: failed to patch default config: {e}")
            print("[SYNC] Created openclaw.json from default template")
        else:
            with open(config_path, "w") as f:
                json.dump({
                    "gateway": {
                        "mode": "local", "bind": "lan", "port": 7860,
                        "trustedProxies": ["0.0.0.0/0"],
                        "controlUi": {
                            "allowInsecureAuth": True,
                            "allowedOrigins": [
                                "https://huggingface.co"
                            ]
                        }
                    },
                    "session": {"scope": "global"},
                    "models": {"mode": "merge", "providers": {}},
                    "agents": {"defaults": {"workspace": "~/.openclaw/workspace"}}
                }, f)
            print("[SYNC] Created minimal openclaw.json")

    def _patch_config(self):
        """Ensure critical settings after restore."""
        config_path = OPENCLAW_HOME / "openclaw.json"
        if not config_path.exists():
            self._ensure_default_config()
            return

        print("[SYNC] Patching configuration...")
        try:
            with open(config_path, "r") as f:
                data = json.load(f)
            print("[SYNC] Config parsed OK.")
        except (json.JSONDecodeError, Exception) as e:
            # Config is corrupt β€” back up and start fresh
            print(f"[SYNC] Config JSON is corrupt: {e}")
            backup = config_path.with_suffix(f".corrupt_{int(time.time())}")
            try:
                shutil.copy2(config_path, backup)
                print(f"[SYNC] Backed up corrupt config to {backup.name}")
            except Exception:
                pass
            data = {}
            print("[SYNC] Starting from clean config.")

        try:
            # Remove /dev/null from plugins.locations
            if "plugins" in data and isinstance(data.get("plugins"), dict):
                locs = data["plugins"].get("locations", [])
                if isinstance(locs, list) and "/dev/null" in locs:
                    data["plugins"]["locations"] = [l for l in locs if l != "/dev/null"]

            # Clean up invalid config keys that crash OpenClaw
            if "auth" in data and isinstance(data.get("auth"), dict):
                data["auth"].pop("defaultScope", None)
                if not data["auth"]:
                    del data["auth"]
            if "gateway" in data and isinstance(data.get("gateway"), dict):
                auth = data["gateway"].get("auth", {})
                if isinstance(auth, dict):
                    auth.pop("scope", None)

            # Force full gateway config for HF Spaces
            # Dynamic allowedOrigins from SPACE_HOST (auto-set by HF runtime)
            allowed_origins = [
                "https://huggingface.co",
                "https://*.hf.space",
            ]
            if SPACE_HOST:
                allowed_origins.append(f"https://{SPACE_HOST}")
                print(f"[SYNC] SPACE_HOST detected: {SPACE_HOST}")
            data["gateway"] = {
                "mode": "local",
                "bind": "lan",
                "port": 7860,
                "auth": {"token": GATEWAY_TOKEN},
                "trustedProxies": ["0.0.0.0/0"],
                "controlUi": {
                    "allowInsecureAuth": True,
                    "dangerouslyDisableDeviceAuth": True,
                    "allowedOrigins": allowed_origins
                }
            }
            print(f"[SYNC] Set gateway config (port=7860, auth=token, origins={len(allowed_origins)})")

            # Ensure agents defaults
            data.setdefault("agents", {}).setdefault("defaults", {}).setdefault("model", {})
            data.setdefault("session", {})["scope"] = "global"

            # Build providers from scratch β€” only include providers with active API keys.
            # This ensures removed secrets don't leave stale providers from backup.
            providers = {}
            if OPENAI_API_KEY:
                providers["openai"] = {
                    "baseUrl": OPENAI_BASE_URL,
                    "apiKey": OPENAI_API_KEY,
                    "api": "openai-completions",
                }
                print(f"[SYNC] Set OpenAI-compatible provider (baseUrl={OPENAI_BASE_URL})")
            if OPENROUTER_API_KEY:
                providers["openrouter"] = {
                    "baseUrl": "https://openrouter.ai/api/v1",
                    "apiKey": OPENROUTER_API_KEY,
                    "api": "openai-completions",
                    "models": [
                        {"id": "openai/gpt-oss-20b:free", "name": "GPT-OSS-20B (Free)"},
                        {"id": "deepseek/deepseek-chat:free", "name": "DeepSeek V3 (Free)"}
                    ]
                }
                print("[SYNC] Set OpenRouter provider")
            if ZHIPU_API_KEY:
                providers["zhipu"] = {
                    "baseUrl": "https://open.bigmodel.cn/api/anthropic",
                    "apiKey": ZHIPU_API_KEY,
                    "api": "anthropic-messages",
                    "models": [
                        {"id": "glm-4.5-air", "name": "GLM-4.5 Air"},
                        {"id": "glm-4.5", "name": "GLM-4.5"},
                        {"id": "glm-4.6", "name": "GLM-4.6"},
                        {"id": "glm-4.7", "name": "GLM-4.7"},
                    ]
                }
                print("[SYNC] Set Zhipu AI provider")
            if not providers:
                print("[SYNC] WARNING: No API key set (OPENAI/OPENROUTER/ZHIPU), LLM features may not work")
            data.setdefault("models", {})["providers"] = providers
            data["agents"]["defaults"]["model"]["primary"] = OPENCLAW_DEFAULT_MODEL

            # Plugin whitelist
            data.setdefault("plugins", {}).setdefault("entries", {})
            plugin_allow = ["telegram", "whatsapp"]
            if A2A_PEERS:
                plugin_allow.append("a2a-gateway")
            data["plugins"]["allow"] = plugin_allow
            if "telegram" not in data["plugins"]["entries"]:
                data["plugins"]["entries"]["telegram"] = {"enabled": True}
            elif isinstance(data["plugins"]["entries"]["telegram"], dict):
                data["plugins"]["entries"]["telegram"]["enabled"] = True

            # ── A2A Gateway Plugin Configuration (only if A2A_PEERS is set) ──
            if A2A_PEERS:
                peers = []
                for peer_url in A2A_PEERS.split(","):
                    peer_url = peer_url.strip()
                    if not peer_url:
                        continue
                    name = peer_url.split("//")[-1].split(".")[0].split("-")[-1].capitalize()
                    peers.append({
                        "name": name,
                        "agentCardUrl": f"{peer_url}/.well-known/agent-card.json"
                    })
                    print(f"[SYNC] A2A peer: {name} β†’ {peer_url}")

                data["plugins"]["entries"]["a2a-gateway"] = {
                    "enabled": True,
                    "config": {
                        "agentCard": {
                            "name": AGENT_NAME,
                            "description": f"{AGENT_NAME} - HuggingClaw A2A Agent",
                            "skills": [{"id": "chat", "name": "chat", "description": "Chat bridge"}]
                        },
                        "server": {"host": "0.0.0.0", "port": 18800},
                        "security": {"inboundAuth": "none"},
                        "routing": {"defaultAgentId": "main"},
                        "peers": peers
                    }
                }
                print(f"[SYNC] A2A gateway configured: name={AGENT_NAME}, port=18800, peers={len(peers)}")

            # ── Telegram channel defaults (open DM policy for HF Spaces) ──
            # Personal bot on HF Spaces β€” no need for strict pairing.
            tg_ch = data.setdefault("channels", {}).setdefault("telegram", {})
            tg_ch["dmPolicy"] = "open"
            tg_ch["allowFrom"] = ["*"]
            tg_ch["configWrites"] = True
            print("[SYNC] Set channels.telegram: dmPolicy=open, allowFrom=[*], configWrites=true")

            # ── Telegram API base auto-probe ──────────────────────────────
            # Probe is done in run_openclaw() β€” sets TELEGRAM_API_ROOT env var
            # for the telegram-proxy.cjs preload script to intercept fetch().

            with open(config_path, "w") as f:
                json.dump(data, f, indent=2)
            print("[SYNC] Config patched and saved.")

            # Fix paired devices scopes (OpenClaw 2026.2.19+ requires operator.write/read)
            # Delete old paired devices to force fresh auto-pair with correct scopes
            devices_dir = Path(OPENCLAW_DIR) / "devices"
            if devices_dir.exists():
                import shutil
                shutil.rmtree(devices_dir, ignore_errors=True)
                print("[SYNC] Deleted devices/ dir to force fresh auto-pair with operator.write/read scopes")

            # Verify write
            with open(config_path, "r") as f:
                verify_data = json.load(f)
                gw = verify_data.get("gateway", {})
                providers = list(verify_data.get("models", {}).get("providers", {}).keys())
                primary = verify_data.get("agents", {}).get("defaults", {}).get("model", {}).get("primary")
                print(f"[SYNC] VERIFY: gateway.port={gw.get('port')}, providers={providers}, primary={primary}")

        except Exception as e:
            print(f"[SYNC] Failed to patch config: {e}")
            traceback.print_exc()

    def _debug_list_files(self):
        try:
            count = sum(1 for _, _, files in os.walk(OPENCLAW_HOME) for _ in files)
            print(f"[SYNC] Local ~/.openclaw: {count} files")
        except Exception as e:
            print(f"[SYNC] listing failed: {e}")

    # ── Background sync loop ──────────────────────────────────────────

    def background_sync_loop(self, stop_event):
        print(f"[SYNC] Background sync started (interval={SYNC_INTERVAL}s)")
        while not stop_event.is_set():
            if stop_event.wait(timeout=SYNC_INTERVAL):
                break
            print(f"[SYNC] ── Periodic sync triggered at {datetime.now().isoformat()} ──")
            self.save_to_repo()

    # ── Application runner ─────────────────────────────────────────────

    def run_openclaw(self):
        log_file = OPENCLAW_HOME / "workspace" / "startup.log"
        log_file.parent.mkdir(parents=True, exist_ok=True)

        # Debug: check if app directory exists
        if not Path(APP_DIR).exists():
            print(f"[SYNC] ERROR: App directory does not exist: {APP_DIR}")
            return None

        # Debug: check entry point (dist/entry.js or openclaw.mjs)
        entry_js = Path(APP_DIR) / "dist" / "entry.js"
        openclaw_mjs = Path(APP_DIR) / "openclaw.mjs"
        if entry_js.exists():
            entry_cmd = ["node", "dist/entry.js", "gateway"]
        elif openclaw_mjs.exists():
            entry_cmd = ["node", "openclaw.mjs", "gateway", "--allow-unconfigured"]
        else:
            print(f"[SYNC] ERROR: No entry point found in {APP_DIR}")
            print(f"[SYNC]   Checked: dist/entry.js, openclaw.mjs")
            # List what's actually there
            try:
                print(f"[SYNC]   Contents: {list(Path(APP_DIR).iterdir())[:20]}")
            except: pass
            return None

        # Use subprocess.run with direct output, no shell pipe
        print(f"[SYNC] Launching: {' '.join(entry_cmd)}")
        print(f"[SYNC] Working directory: {APP_DIR}")
        print(f"[SYNC] Log file: {log_file}")

        # Open log file
        log_fh = open(log_file, "a")

        # Prepare environment (all API keys passed through for OpenClaw)
        env = os.environ.copy()
        if OPENAI_API_KEY:
            env["OPENAI_API_KEY"] = OPENAI_API_KEY
            env["OPENAI_BASE_URL"] = OPENAI_BASE_URL
        if OPENROUTER_API_KEY:
            env["OPENROUTER_API_KEY"] = OPENROUTER_API_KEY
        if ZHIPU_API_KEY:
            env["ZHIPU_API_KEY"] = ZHIPU_API_KEY
        if not OPENAI_API_KEY and not OPENROUTER_API_KEY and not ZHIPU_API_KEY:
            print(f"[SYNC] WARNING: No API key set (OPENAI/OPENROUTER/ZHIPU), LLM features may not work")

        # ── Telegram API base probe ──────────────────────────────────────
        # Determine working Telegram API endpoint and set env var for
        # telegram-proxy.cjs to intercept fetch() calls.
        if TELEGRAM_API_BASE:
            tg_root = TELEGRAM_API_BASE.rstrip("/")
            print(f"[TELEGRAM] Using user-specified API base: {tg_root}")
        else:
            print("[TELEGRAM] Probing Telegram API endpoints...")
            tg_root = probe_telegram_api()

        if tg_root and tg_root != "https://api.telegram.org":
            env["TELEGRAM_API_ROOT"] = tg_root
            print(f"[TELEGRAM] Set TELEGRAM_API_ROOT={tg_root}")
            print(f"[TELEGRAM] telegram-proxy.cjs will redirect fetch() calls")
        elif tg_root:
            print("[TELEGRAM] Official API reachable β€” no proxy needed")
        else:
            print("[TELEGRAM] No reachable endpoint found β€” Telegram will not work")
        try:
            # Use Popen without shell to avoid pipe issues
            # auth disabled in config β€” no token needed
            process = subprocess.Popen(
                entry_cmd,
                cwd=str(APP_DIR),
                stdout=subprocess.PIPE,  # Capture so we can log it
                stderr=subprocess.STDOUT,
                text=True,
                bufsize=1,  # Line buffered
                env=env  # Pass environment with OPENROUTER_API_KEY
            )

            # Create a thread to copy output to log file; only print key lines to console
            def copy_output():
                try:
                    for line in process.stdout:
                        log_fh.write(line)
                        log_fh.flush()
                        # Only forward important lines to console (errors, warnings, startup)
                        # Skip noisy download/progress lines that flood the HF Spaces log viewer
                        stripped = line.strip()
                        if not stripped:
                            continue
                        # Skip progress bars and download noise
                        if any(skip in stripped for skip in [
                            'Downloading', 'Fetching', '%|', '━', '───',
                            'Already cached', 'Using cache', 'tokenizer',
                            '.safetensors', 'model-', 'shard',
                        ]):
                            continue
                        print(line, end='')
                except Exception as e:
                    print(f"[SYNC] Output copy error: {e}")
                finally:
                    log_fh.close()

            thread = threading.Thread(target=copy_output, daemon=True)
            thread.start()

            print(f"[SYNC] Process started with PID: {process.pid}")
            return process

        except Exception as e:
            log_fh.close()
            print(f"[SYNC] ERROR: Failed to start process: {e}")
            traceback.print_exc()
            return None

# ── Main ────────────────────────────────────────────────────────────────────

def main():
    try:
        t_main_start = time.time()

        t0 = time.time()
        sync = OpenClawFullSync()
        print(f"[TIMER] sync_hf init: {time.time() - t0:.1f}s")

        # 1. Restore
        t0 = time.time()
        sync.load_from_repo()
        print(f"[TIMER] load_from_repo (restore): {time.time() - t0:.1f}s")

        # 2. Background sync
        stop_event = threading.Event()
        t = threading.Thread(target=sync.background_sync_loop, args=(stop_event,), daemon=True)
        t.start()

        # 3. Start application
        t0 = time.time()
        process = sync.run_openclaw()
        print(f"[TIMER] run_openclaw launch: {time.time() - t0:.1f}s")
        print(f"[TIMER] Total startup (init β†’ app launched): {time.time() - t_main_start:.1f}s")

        # Signal handler
        def handle_signal(sig, frame):
            print(f"\n[SYNC] Signal {sig} received. Shutting down...")
            stop_event.set()
            # Wait for background sync to finish if it's running
            t.join(timeout=10)
            if process:
                process.terminate()
                try:
                    process.wait(timeout=5)
                except subprocess.TimeoutExpired:
                    process.kill()
            print("[SYNC] Final sync...")
            sync.save_to_repo()
            sys.exit(0)

        signal.signal(signal.SIGINT, handle_signal)
        signal.signal(signal.SIGTERM, handle_signal)

        # Wait
        if process is None:
            print("[SYNC] ERROR: Failed to start OpenClaw process. Exiting.")
            stop_event.set()
            t.join(timeout=5)
            sys.exit(1)

        exit_code = process.wait()
        print(f"[SYNC] OpenClaw exited with code {exit_code}")
        stop_event.set()
        t.join(timeout=10)
        print("[SYNC] Final sync...")
        sync.save_to_repo()
        sys.exit(exit_code)

    except Exception as e:
        print(f"[SYNC] FATAL ERROR in main: {e}")
        traceback.print_exc()
        sys.exit(1)


if __name__ == "__main__":
    main()