yadinae commited on
Commit
cd76221
·
verified ·
1 Parent(s): 99ec157

Create sync.py

Browse files
Files changed (1) hide show
  1. sync.py +80 -0
sync.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, time, threading, shutil
2
+ from huggingface_hub import HfApi, snapshot_download
3
+
4
+ # NANOBOT路径对齐
5
+ NANOBOT_CONFIG = {"/root/.nanobot": "nanobot_settings", "/root/project/data": "data"}
6
+ # Hermes数据路径对齐
7
+ HERMES_CONFIG = {"/root/.hermes": "hermes_settings"}
8
+
9
+ IGNORE_LIST = ["*.db-shm", "*.db-wal", "models_dev_cache.json", "checkpoints/*", "*.pyc", "__pycache__", "hermes-agent/venv", "hermes-agent/.git", "logs/*", "cache/*", "*.pid", "*.lock","tmp/*","*.tmp", "__pycache__/*", "*.log.lock", "node_modules/*", "pkg/*", ".cache/*", ".npm/*","backup/*"]
10
+
11
+ DATASET_ID, HF_TOKEN = os.getenv("DATASET_ID"), os.getenv("HF_TOKEN")
12
+ api = HfApi()
13
+
14
+ def upload_all():
15
+ if not DATASET_ID or not HF_TOKEN: return
16
+ print(f"🚀 正在备份Nanobot数据...")
17
+ for local_path, repo_path in NANOBOT_CONFIG.items():
18
+ if os.path.exists(local_path) and os.listdir(local_path):
19
+ try:
20
+ api.upload_folder(folder_path=local_path, path_in_repo=repo_path,
21
+ repo_id=DATASET_ID, repo_type="dataset", token=HF_TOKEN,
22
+ delete_patterns="*", ignore_patterns=IGNORE_LIST)
23
+ print(f"✅ {repo_path} 备份成功")
24
+ except Exception as e: print(f"❌ 备份翻车: {e}")
25
+
26
+ # 备份Hermes数据
27
+ print(f"🚀 正在备份Hermes数据...")
28
+ for local_path, repo_path in HERMES_CONFIG.items():
29
+ if os.path.exists(local_path):
30
+ try:
31
+ api.upload_folder(folder_path=local_path, path_in_repo=repo_path,
32
+ repo_id=DATASET_ID, repo_type="dataset", token=HF_TOKEN,
33
+ delete_patterns="*", ignore_patterns=IGNORE_LIST)
34
+ print(f"✅ {repo_path} 备份成功")
35
+ except Exception as e: print(f"❌ Hermes备份翻车: {e}")
36
+
37
+ def download_all():
38
+ if not DATASET_ID or not HF_TOKEN: return
39
+ print("📥 正在恢复Nanobot数据...")
40
+ for local_path, repo_path in SYNC_CONFIG.items():
41
+ try:
42
+ temp_dir = f"/tmp/hf_{repo_path}"
43
+ os.makedirs(local_path, exist_ok=True)
44
+ snapshot_download(repo_id=DATASET_ID, repo_type="dataset", local_dir=temp_dir,
45
+ allow_patterns=f"{repo_path}/*", token=HF_TOKEN, local_dir_use_symlinks=False)
46
+ source_dir = os.path.join(temp_dir, repo_path)
47
+ if os.path.exists(source_dir):
48
+ for item in os.listdir(source_dir):
49
+ s, d = os.path.join(source_dir, item), os.path.join(local_path, item)
50
+ if os.path.isdir(s):
51
+ if os.path.exists(d): shutil.rmtree(d)
52
+ shutil.copytree(s, d)
53
+ else: shutil.copy2(s, d)
54
+ shutil.rmtree(temp_dir, ignore_errors=True)
55
+ print(f"✅ {local_path} 恢复成功")
56
+ except Exception as e: print(f"ℹ️ {repo_path} 恢复跳过")
57
+
58
+ # 恢复Hermes数据
59
+ print("📥 正在恢复Hermes数据...")
60
+ for local_path, repo_path in HERMES_CONFIG.items():
61
+ try:
62
+ temp_dir = f"/tmp/hf_{repo_path}"
63
+ os.makedirs(local_path, exist_ok=True)
64
+ snapshot_download(repo_id=DATASET_ID, repo_type="dataset", local_dir=temp_dir,
65
+ allow_patterns=f"{repo_path}/*", token=HF_TOKEN, local_dir_use_symlinks=False)
66
+ source_dir = os.path.join(temp_dir, repo_path)
67
+ if os.path.exists(source_dir):
68
+ for item in os.listdir(source_dir):
69
+ s, d = os.path.join(source_dir, item), os.path.join(local_path, item)
70
+ if os.path.isdir(s):
71
+ if os.path.exists(d): shutil.rmtree(d)
72
+ shutil.copytree(s, d)
73
+ else: shutil.copy2(s, d)
74
+ shutil.rmtree(temp_dir, ignore_errors=True)
75
+ print(f"✅ {local_path} 恢复成功")
76
+ except Exception as e: print(f"ℹ️ {repo_path} 恢复跳过")
77
+
78
+ if __name__ == "__main__":
79
+ threading.Thread(target=lambda: [time.sleep(600) or upload_all() for _ in iter(int, 1)], daemon=True).start()
80
+ while True: time.sleep(1)