File size: 4,610 Bytes
b19d31a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import os
import tarfile
import tempfile
import sys
import time
from datetime import datetime

from huggingface_hub import HfApi

def main() -> None:
    """
    Backs up ~/.openclaw to Hugging Face Dataset with rolling history.
    Keeps the last 5 backups to prevent data loss from corruption.
    
    Env vars:
    - HF_TOKEN
    - OPENCLAW_DATASET_REPO
    """
    repo_id = os.environ.get("OPENCLAW_DATASET_REPO")
    token = os.environ.get("HF_TOKEN")

    state_dir = os.path.expanduser("~/.openclaw")

    if not repo_id or not token:
        print("[save_to_dataset] Missing configuration.", file=sys.stderr)
        return

    if not os.path.isdir(state_dir):
        print("[save_to_dataset] No state to save.", file=sys.stderr)
        return

    # 1. Validation: Ensure we have valid credentials before backing up
    wa_creds_dir = os.path.join(state_dir, "credentials", "whatsapp", "default")
    if os.path.isdir(wa_creds_dir):
        file_count = len([f for f in os.listdir(wa_creds_dir) if os.path.isfile(os.path.join(wa_creds_dir, f))])
        if file_count < 2:
             # Basic sanity check: needs at least creds.json + keys. 
             # Lowered from 10 to 2 to be less aggressive but still catch empty/broken states.
            print(f"[save_to_dataset] Skip: WhatsApp credentials incomplete ({file_count} files).", file=sys.stderr)
            return

    api = HfApi(token=token)
    
    # Sync system logs to state dir for persistence
    try:
        sys_log_path = "/home/node/logs"
        backup_log_path = os.path.join(state_dir, "logs/sys_logs")
        if os.path.exists(sys_log_path):
            if os.path.exists(backup_log_path):
                import shutil
                shutil.rmtree(backup_log_path)
            # Use shutil.copytree but ignore socket files if any
            import shutil
            shutil.copytree(sys_log_path, backup_log_path, ignore_dangling_symlinks=True)
            print(f"[save_to_dataset] Synced logs from {sys_log_path} to {backup_log_path}")
    except Exception as e:
        print(f"[save_to_dataset] Warning: Failed to sync logs: {e}")

    # Check for credentials
    creds_path = os.path.join(state_dir, "credentials/whatsapp/default/auth_info_multi.json")
    if os.path.exists(creds_path):
        print(f"[save_to_dataset] ✅ WhatsApp credentials found at {creds_path}")
    else:
        print(f"[save_to_dataset] ⚠️  WhatsApp credentials NOT found (user might need to login)")

    # Generate timestamped filename
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    filename = f"state/backup-{timestamp}.tar.gz"

    with tempfile.TemporaryDirectory() as tmpdir:
        tar_path = os.path.join(tmpdir, "openclaw.tar.gz")

        try:
            with tarfile.open(tar_path, "w:gz") as tf:
                # Filter to exclude lock files or temp files if needed, but allow extensions
                def exclude_filter(info: tarfile.TarInfo) -> tarfile.TarInfo | None:
                    if info.name.endswith(".lock"):
                        return None
                    return info
                
                tf.add(state_dir, arcname=".", filter=exclude_filter)
        except Exception as e:
            print(f"[save_to_dataset] Failed to compress: {e}", file=sys.stderr)
            return

        print(f"[save_to_dataset] Uploading backup: {filename}")
        try:
            api.upload_file(
                path_or_fileobj=tar_path,
                path_in_repo=filename,
                repo_id=repo_id,
                repo_type="dataset",
            )
        except Exception as e:
            print(f"[save_to_dataset] Upload failed: {e}", file=sys.stderr)
            return

    # 2. Rotation: Delete old backups, keep last 5
    try:
        files = api.list_repo_files(repo_id=repo_id, repo_type="dataset")
        # Match both .tar and .tar.gz for backward compatibility during transition
        backups = sorted([f for f in files if f.startswith("state/backup-") and (f.endswith(".tar") or f.endswith(".tar.gz"))])
        
        if len(backups) > 5:
            # Delete oldest
            to_delete = backups[:-5]
            print(f"[save_to_dataset] Rotating backups, deleting: {to_delete}")
            for old_backup in to_delete:
                api.delete_file(
                    path_in_repo=old_backup,
                    repo_id=repo_id,
                    repo_type="dataset",
                    token=token
                )
    except Exception as e:
        print(f"[save_to_dataset] Rotation failed (non-fatal): {e}", file=sys.stderr)