1u commited on
Commit
4912a3d
·
verified ·
1 Parent(s): 3901e53

Upload sync_data.py

Browse files
Files changed (1) hide show
  1. sync_data.py +83 -0
sync_data.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ import time
4
+ import threading
5
+ from pathlib import Path
6
+ from datetime import datetime
7
+ from huggingface_hub import HfApi, hf_hub_download, list_repo_files
8
+
9
+ DATA_DIR = Path(os.environ.get("DATA_DIR", "/root/.openclaw"))
10
+ DATASET_REPO_ID = os.environ.get("DATASET_REPO_ID", "your-username/your-dataset")
11
+ SYNC_INTERVAL = int(os.environ.get("SYNC_INTERVAL", 300))
12
+ ARCHIVE_NAME = "openclaw_backup.tar.gz"
13
+
14
+ api = HfApi()
15
+
16
+
17
+ def download_from_dataset():
18
+ DATA_DIR.mkdir(parents=True, exist_ok=True)
19
+ try:
20
+ files = list_repo_files(repo_id=DATASET_REPO_ID, repo_type="dataset")
21
+ if ARCHIVE_NAME in files:
22
+ print(f"[{datetime.now()}] Downloading {ARCHIVE_NAME}...")
23
+ archive_path = hf_hub_download(
24
+ repo_id=DATASET_REPO_ID,
25
+ filename=ARCHIVE_NAME,
26
+ repo_type="dataset",
27
+ local_dir="/tmp"
28
+ )
29
+ shutil.unpack_archive(archive_path, DATA_DIR)
30
+ os.remove(archive_path)
31
+ print(f"[{datetime.now()}] Data restored to {DATA_DIR}")
32
+ else:
33
+ print(f"[{datetime.now()}] No backup found, starting fresh")
34
+ except Exception as e:
35
+ print(f"[{datetime.now()}] Download failed: {e}")
36
+
37
+
38
+ def upload_to_dataset():
39
+ if not any(DATA_DIR.iterdir()):
40
+ print(f"[{datetime.now()}] No files to upload")
41
+ return
42
+ try:
43
+ archive_path = Path("/tmp") / ARCHIVE_NAME
44
+ shutil.make_archive(
45
+ archive_path.with_suffix("").with_suffix(""),
46
+ "gztar",
47
+ DATA_DIR
48
+ )
49
+ print(f"[{datetime.now()}] Uploading {ARCHIVE_NAME}...")
50
+ api.upload_file(
51
+ path_or_fileobj=str(archive_path),
52
+ path_in_repo=ARCHIVE_NAME,
53
+ repo_id=DATASET_REPO_ID,
54
+ repo_type="dataset",
55
+ )
56
+ os.remove(archive_path)
57
+ print(f"[{datetime.now()}] Upload completed")
58
+ except Exception as e:
59
+ print(f"[{datetime.now()}] Upload failed: {e}")
60
+
61
+
62
+ def sync_loop():
63
+ while True:
64
+ time.sleep(SYNC_INTERVAL)
65
+ upload_to_dataset()
66
+
67
+
68
+ def main():
69
+ print(f"Dataset: {DATASET_REPO_ID}")
70
+ print(f"Data dir: {DATA_DIR}")
71
+ print(f"Sync interval: {SYNC_INTERVAL}s")
72
+
73
+ download_from_dataset()
74
+
75
+ sync_thread = threading.Thread(target=sync_loop, daemon=True)
76
+ sync_thread.start()
77
+
78
+ while True:
79
+ time.sleep(3600)
80
+
81
+
82
+ if __name__ == "__main__":
83
+ main()