File size: 3,153 Bytes
4912a3d
433e445
4912a3d
f34c40b
4912a3d
433e445
4912a3d
 
 
 
 
 
 
d6f2975
4912a3d
f34c40b
4912a3d
d6f2975
433e445
 
 
 
 
 
 
 
 
4912a3d
 
 
 
 
d6f2975
4912a3d
 
 
 
 
 
d6f2975
 
4912a3d
 
 
 
 
 
 
 
 
 
 
f34c40b
 
4912a3d
 
 
 
f34c40b
 
 
4912a3d
 
 
 
 
 
 
 
 
 
 
 
 
433e445
 
 
 
4912a3d
 
 
 
433e445
 
 
 
 
 
4912a3d
 
 
433e445
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
import os
import sys
import shutil
import tarfile
import time
import signal
from pathlib import Path
from datetime import datetime
from huggingface_hub import HfApi, hf_hub_download, list_repo_files

DATA_DIR = Path(os.environ.get("DATA_DIR", "/root/.openclaw"))
DATASET_REPO_ID = os.environ.get("DATASET_REPO_ID", "your-username/your-dataset")
SYNC_INTERVAL = int(os.environ.get("SYNC_INTERVAL", 300))
HF_TOKEN = os.environ.get("HF_TOKEN")
ARCHIVE_NAME = "openclaw_backup.tar.gz"
EXCLUDE_FILES = {"openclaw.json"}

api = HfApi(token=HF_TOKEN)
running = True


def signal_handler(signum, frame):
    global running
    print(f"[{datetime.now()}] Received signal {signum}, uploading before exit...")
    upload_to_dataset()
    running = False
    sys.exit(0)


def download_from_dataset():
    DATA_DIR.mkdir(parents=True, exist_ok=True)
    try:
        files = list_repo_files(repo_id=DATASET_REPO_ID, repo_type="dataset", token=HF_TOKEN)
        if ARCHIVE_NAME in files:
            print(f"[{datetime.now()}] Downloading {ARCHIVE_NAME}...")
            archive_path = hf_hub_download(
                repo_id=DATASET_REPO_ID,
                filename=ARCHIVE_NAME,
                repo_type="dataset",
                local_dir="/tmp",
                token=HF_TOKEN
            )
            shutil.unpack_archive(archive_path, DATA_DIR)
            os.remove(archive_path)
            print(f"[{datetime.now()}] Data restored to {DATA_DIR}")
        else:
            print(f"[{datetime.now()}] No backup found, starting fresh")
    except Exception as e:
        print(f"[{datetime.now()}] Download failed: {e}")


def upload_to_dataset():
    files_to_backup = [f for f in DATA_DIR.iterdir() if f.name not in EXCLUDE_FILES]
    if not files_to_backup:
        print(f"[{datetime.now()}] No files to upload")
        return
    try:
        archive_path = Path("/tmp") / ARCHIVE_NAME
        with tarfile.open(archive_path, "w:gz") as tar:
            for file_path in files_to_backup:
                tar.add(file_path, arcname=file_path.name)
        print(f"[{datetime.now()}] Uploading {ARCHIVE_NAME}...")
        api.upload_file(
            path_or_fileobj=str(archive_path),
            path_in_repo=ARCHIVE_NAME,
            repo_id=DATASET_REPO_ID,
            repo_type="dataset",
        )
        os.remove(archive_path)
        print(f"[{datetime.now()}] Upload completed")
    except Exception as e:
        print(f"[{datetime.now()}] Upload failed: {e}")


def upload_loop():
    signal.signal(signal.SIGTERM, signal_handler)
    signal.signal(signal.SIGINT, signal_handler)
    while running:
        time.sleep(SYNC_INTERVAL)
        upload_to_dataset()


if __name__ == "__main__":
    if len(sys.argv) < 2:
        print("Usage: python sync_data.py [download|upload_loop]")
        sys.exit(1)

    cmd = sys.argv[1]
    print(f"Dataset: {DATASET_REPO_ID}")
    print(f"Data dir: {DATA_DIR}")

    if cmd == "download":
        download_from_dataset()
    elif cmd == "upload_loop":
        print(f"Sync interval: {SYNC_INTERVAL}s")
        upload_loop()
    else:
        print(f"Unknown command: {cmd}")
        sys.exit(1)