File size: 11,554 Bytes
5365372
 
 
 
 
 
 
 
 
 
 
 
 
969345a
5365372
 
 
84bc607
 
eec7304
 
969345a
eec7304
 
 
 
 
 
 
969345a
 
 
eec7304
7cd1716
5365372
 
 
51ec4bc
bcbf1ad
5365372
 
 
 
 
 
 
 
 
 
 
969345a
 
 
 
 
 
 
 
 
 
 
 
 
 
eec7304
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
969345a
bcbf1ad
 
 
969345a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5365372
 
 
 
 
 
 
 
 
 
 
 
51ec4bc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5365372
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eec7304
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5365372
eec7304
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5365372
 
 
 
 
7cd1716
 
 
 
 
 
ea9c8a5
 
 
 
 
 
7cd1716
 
5365372
 
 
 
 
 
eec7304
 
5365372
 
 
 
 
eec7304
5365372
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
#!/usr/bin/env python3
"""
HuggingClaw Workspace Sync β€” HuggingFace Hub based backup
Uses huggingface_hub Python library instead of git for more reliable
HF Dataset operations (handles auth, LFS, retries automatically).

Falls back to git-based sync if HF_USERNAME or HF_TOKEN are not set.
"""

import os
import sys
import time
import signal
import shutil
import subprocess
from pathlib import Path

os.environ.setdefault("HF_HUB_DISABLE_PROGRESS_BARS", "1")

OPENCLAW_HOME = Path("/home/node/.openclaw")
WORKSPACE = OPENCLAW_HOME / "workspace"
STATE_DIR = WORKSPACE / ".huggingclaw-state"
OPENCLAW_STATE_BACKUP_DIR = STATE_DIR / "openclaw"
EXCLUDED_STATE_NAMES = {
    "workspace",
    "openclaw-app",
    "gateway.log",
    "browser",
}
WHATSAPP_CREDS_DIR = Path("/home/node/.openclaw/credentials/whatsapp/default")
WHATSAPP_BACKUP_DIR = STATE_DIR / "credentials" / "whatsapp" / "default"
RESET_MARKER = WORKSPACE / ".reset_credentials"
INTERVAL = int(os.environ.get("SYNC_INTERVAL", "180"))
INITIAL_DELAY = int(os.environ.get("SYNC_START_DELAY", "10"))
HF_TOKEN = os.environ.get("HF_TOKEN", "")
HF_USERNAME = os.environ.get("HF_USERNAME", "")
BACKUP_DATASET = os.environ.get("BACKUP_DATASET_NAME", "huggingclaw-backup")
WEBHOOK_URL = os.environ.get("WEBHOOK_URL", "")
WHATSAPP_ENABLED = os.environ.get("WHATSAPP_ENABLED", "").strip().lower() == "true"

running = True

def signal_handler(sig, frame):
    global running
    running = False

signal.signal(signal.SIGTERM, signal_handler)
signal.signal(signal.SIGINT, signal_handler)


def count_files(path: Path) -> int:
    """Count regular files recursively under a path."""
    if not path.exists():
        return 0
    return sum(1 for child in path.rglob("*") if child.is_file())


def snapshot_state_into_workspace() -> None:
    """
    Mirror persistent state into the workspace-backed dataset repo.

    This keeps WhatsApp credentials in a hidden folder that is synced together
    with the workspace, without changing the live credentials location.
    """
    try:
        STATE_DIR.mkdir(parents=True, exist_ok=True)
        if OPENCLAW_STATE_BACKUP_DIR.exists():
            shutil.rmtree(OPENCLAW_STATE_BACKUP_DIR, ignore_errors=True)
        OPENCLAW_STATE_BACKUP_DIR.mkdir(parents=True, exist_ok=True)

        for source_path in OPENCLAW_HOME.iterdir():
            if source_path.name in EXCLUDED_STATE_NAMES:
                continue

            backup_path = OPENCLAW_STATE_BACKUP_DIR / source_path.name
            if source_path.is_dir():
                shutil.copytree(source_path, backup_path)
            elif source_path.is_file():
                shutil.copy2(source_path, backup_path)
    except Exception as e:
        print(f"  ⚠️ Could not snapshot OpenClaw state: {e}")

    try:
        if not WHATSAPP_ENABLED:
            return

        STATE_DIR.mkdir(parents=True, exist_ok=True)

        if RESET_MARKER.exists():
            if WHATSAPP_BACKUP_DIR.exists():
                shutil.rmtree(WHATSAPP_BACKUP_DIR, ignore_errors=True)
                print("🧹 Removed backed-up WhatsApp credentials after reset request.")
            RESET_MARKER.unlink(missing_ok=True)
            return

        if not WHATSAPP_CREDS_DIR.exists():
            return

        file_count = count_files(WHATSAPP_CREDS_DIR)
        if file_count < 2:
            if file_count > 0:
                print(f"πŸ“¦ WhatsApp backup skipped: credentials incomplete ({file_count} files).")
            return

        WHATSAPP_BACKUP_DIR.parent.mkdir(parents=True, exist_ok=True)
        if WHATSAPP_BACKUP_DIR.exists():
            shutil.rmtree(WHATSAPP_BACKUP_DIR, ignore_errors=True)
        shutil.copytree(WHATSAPP_CREDS_DIR, WHATSAPP_BACKUP_DIR)
    except Exception as e:
        print(f"  ⚠️ Could not snapshot WhatsApp state: {e}")


def has_changes():
    """Check if workspace has uncommitted changes (git-based check)."""
    try:
        subprocess.run(["git", "add", "-A"], cwd=WORKSPACE, capture_output=True)
        result = subprocess.run(
            ["git", "diff", "--cached", "--quiet"],
            cwd=WORKSPACE, capture_output=True
        )
        return result.returncode != 0
    except Exception:
        return False

def write_sync_status(status, message=""):
    """Write sync status to file for the health server dashboard."""
    try:
        import json
        data = {
            "status": status,
            "timestamp": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
            "message": message
        }
        with open("/tmp/sync-status.json", "w") as f:
            json.dump(data, f)
    except Exception as e:
        print(f"  ⚠️ Could not write sync status: {e}")

def trigger_webhook(event, status, message):
    """Trigger webhook notification."""
    if not WEBHOOK_URL:
        return
    try:
        import urllib.request
        import json
        data = json.dumps({
            "event": event,
            "status": status,
            "message": message,
            "timestamp": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
        }).encode('utf-8')
        req = urllib.request.Request(WEBHOOK_URL, data=data, headers={'Content-Type': 'application/json'})
        urllib.request.urlopen(req, timeout=10)
    except Exception as e:
        print(f"  ⚠️ Webhook delivery failed: {e}")

def sync_with_hf_hub():
    """Sync workspace using huggingface_hub library."""
    try:
        from huggingface_hub import HfApi, upload_folder

        api = HfApi(token=HF_TOKEN)
        repo_id = f"{HF_USERNAME}/{BACKUP_DATASET}"

        # Ensure dataset exists
        try:
            api.repo_info(repo_id=repo_id, repo_type="dataset")
        except Exception:
            print(f"  πŸ“ Creating dataset {repo_id}...")
            try:
                api.create_repo(repo_id=repo_id, repo_type="dataset", private=True)
                print(f"  βœ… Dataset created: {repo_id}")
            except Exception as e:
                print(f"  ⚠️  Could not create dataset: {e}")
                return False

        # Upload workspace
        upload_folder(
            folder_path=str(WORKSPACE),
            repo_id=repo_id,
            repo_type="dataset",
            token=HF_TOKEN,
            commit_message=f"Auto-sync {time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime())}",
            ignore_patterns=[".git/*", ".git"],
        )
        return True

    except ImportError:
        print("  ⚠️  huggingface_hub not installed, falling back to git")
        return False
    except Exception as e:
        print(f"  ⚠️  HF Hub sync failed: {e}")
        return False


def sync_with_git():
    """Fallback: sync workspace using git."""
    try:
        ts = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
        subprocess.run(["git", "add", "-A"], cwd=WORKSPACE, capture_output=True)
        subprocess.run(
            ["git", "commit", "-m", f"Auto-sync {ts}"],
            cwd=WORKSPACE, capture_output=True
        )
        result = subprocess.run(
            ["git", "push", "origin", "main"],
            cwd=WORKSPACE, capture_output=True
        )
        return result.returncode == 0
    except Exception:
        return False


def run_sync_pass(use_hf_hub: bool) -> None:
    """Snapshot state and push it if anything changed."""
    snapshot_state_into_workspace()

    if not has_changes():
        return

    ts = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
    write_sync_status("syncing", f"Starting sync at {ts}")

    if use_hf_hub:
        if sync_with_hf_hub():
            print(f"πŸ”„ Workspace sync (hf_hub): pushed changes ({ts})")
            write_sync_status("success", "Successfully pushed to HF Hub")
            return

        if sync_with_git():
            print(f"πŸ”„ Workspace sync (git fallback): pushed changes ({ts})")
            write_sync_status("success", "Successfully pushed via git fallback")
            return

        msg = f"Workspace sync: failed ({ts}), will retry"
        print(f"πŸ”„ {msg}")
        write_sync_status("error", msg)
        trigger_webhook("sync", "error", msg)
        return

    if sync_with_git():
        print(f"πŸ”„ Workspace sync (git): pushed changes ({ts})")
        write_sync_status("success", "Successfully pushed via git")
        return

    msg = f"Workspace sync: push failed ({ts}), will retry"
    print(f"πŸ”„ {msg}")
    write_sync_status("error", msg)
    trigger_webhook("sync", "error", msg)


def main():
    if "--snapshot-once" in sys.argv:
        snapshot_state_into_workspace()
        write_sync_status("configured", "State snapshot refreshed during shutdown.")
        return

    if "--sync-once" in sys.argv:
        if not WORKSPACE.exists():
            print("πŸ“ Workspace sync: workspace not found, exiting.")
            return

        use_hf_hub = bool(HF_TOKEN and HF_USERNAME)
        git_dir = WORKSPACE / ".git"

        if not use_hf_hub and not git_dir.exists():
            print("πŸ“ Workspace sync: no git repo and no HF credentials, skipping.")
            return

        snapshot_state_into_workspace()

        if not has_changes():
            print("πŸ“ Workspace sync: no changes to persist.")
            write_sync_status("configured", "No new state changes to sync.")
            return

        ts = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
        write_sync_status("syncing", f"Shutdown sync started at {ts}")

        if use_hf_hub:
            if sync_with_hf_hub():
                print(f"πŸ”„ Workspace sync (hf_hub): pushed changes ({ts})")
                write_sync_status("success", "Shutdown sync pushed to HF Hub")
                return
            if sync_with_git():
                print(f"πŸ”„ Workspace sync (git fallback): pushed changes ({ts})")
                write_sync_status("success", "Shutdown sync pushed via git fallback")
                return
            write_sync_status("error", "Shutdown sync failed")
            print("πŸ“ Workspace sync: shutdown sync failed.")
            return

        if sync_with_git():
            print(f"πŸ”„ Workspace sync (git): pushed changes ({ts})")
            write_sync_status("success", "Shutdown sync pushed via git")
            return

        write_sync_status("error", "Shutdown sync failed")
        print("πŸ“ Workspace sync: shutdown sync failed.")
        return

    if not WORKSPACE.exists():
        print("πŸ“ Workspace sync: workspace not found, exiting.")
        return

    use_hf_hub = bool(HF_TOKEN and HF_USERNAME)
    git_dir = WORKSPACE / ".git"

    if not use_hf_hub and not git_dir.exists():
        print("πŸ“ Workspace sync: no git repo and no HF credentials, skipping.")
        return

    # Give the gateway a short head start before the first sync probe.
    if use_hf_hub:
        write_sync_status("configured", f"Backup enabled. Waiting for next sync in {INTERVAL}s.")
    else:
        write_sync_status("configured", f"Git sync enabled. Waiting for next sync in {INTERVAL}s.")

    # Give the gateway a short head start before the first sync probe.
    time.sleep(INITIAL_DELAY)

    if use_hf_hub:
        print(f"πŸ”„ Workspace sync started (huggingface_hub): every {INTERVAL}s β†’ {HF_USERNAME}/{BACKUP_DATASET}")
    else:
        print(f"πŸ”„ Workspace sync started (git): every {INTERVAL}s")

    run_sync_pass(use_hf_hub)

    while running:
        time.sleep(INTERVAL)
        if not running:
            break

        run_sync_pass(use_hf_hub)


if __name__ == "__main__":
    main()