Gendle commited on
Commit
c6e8a6c
·
verified ·
1 Parent(s): d3a7b72

Upload src/data_sync.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. src/data_sync.py +446 -0
src/data_sync.py ADDED
@@ -0,0 +1,446 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Hermes Agent Data Sync Service
4
+ Handles data persistence to/from Hugging Face Dataset
5
+ """
6
+
7
+ import os
8
+ import sys
9
+ import time
10
+ import json
11
+ import shutil
12
+ import tarfile
13
+ import argparse
14
+ from pathlib import Path
15
+ from datetime import datetime
16
+ from typing import Optional, Dict, List
17
+
18
+ from huggingface_hub import HfApi, hf_hub_download, upload_folder
19
+ from loguru import logger
20
+
21
+ # 文件监控(可选)
22
+ try:
23
+ from watchdog.observers import Observer
24
+ from watchdog.events import FileSystemEventHandler
25
+ WATCHDOG_AVAILABLE = True
26
+ except ImportError:
27
+ WATCHDOG_AVAILABLE = False
28
+ logger.warning("watchdog not installed, file change detection disabled")
29
+
30
+
31
+ class DatasetManager:
32
+ """Manages data synchronization with Hugging Face Dataset"""
33
+
34
+ def __init__(self, dataset_repo: Optional[str] = None, token: Optional[str] = None):
35
+ self.dataset_repo = dataset_repo or os.environ.get('HF_DATASET_REPO')
36
+ self.token = token or os.environ.get('HF_TOKEN') or os.environ.get('HUGGING_FACE_HUB_TOKEN')
37
+ self.api = HfApi(token=self.token)
38
+ self.hermes_home = Path(os.environ.get('HERMES_HOME', '/data/.hermes'))
39
+ self.temp_dir = Path('/tmp/hermes_sync')
40
+
41
+ # 数据路径映射
42
+ self.path_mapping = {
43
+ 'config': self.hermes_home / 'config.yaml',
44
+ 'env': self.hermes_home / '.env',
45
+ 'auth': self.hermes_home / 'auth.json',
46
+ 'soul': self.hermes_home / 'SOUL.md',
47
+ 'memories': self.hermes_home / 'memories',
48
+ 'skills': self.hermes_home / 'skills',
49
+ 'sessions': self.hermes_home / 'sessions',
50
+ 'state_db': self.hermes_home / 'state.db',
51
+ 'logs': self.hermes_home / 'logs',
52
+ 'cron': self.hermes_home / 'cron',
53
+ 'webui_token': Path('/data/.hermes-web-ui') / '.token',
54
+ 'image_cache': self.hermes_home / 'image_cache',
55
+ 'baoyu_skills': Path('/home/appuser/.baoyu-skills'),
56
+ }
57
+
58
+ def validate(self) -> bool:
59
+ """验证配置是否正确"""
60
+ if not self.dataset_repo:
61
+ logger.error("HF_DATASET_REPO not set")
62
+ return False
63
+
64
+ if not self.token:
65
+ logger.warning("HF_TOKEN not set, will try public dataset")
66
+
67
+ return True
68
+
69
+ def prepare_backup_data(self) -> Path:
70
+ """准备备份数据到临时目录"""
71
+ logger.info("Preparing backup data...")
72
+
73
+ # 清理并创建临时目录
74
+ if self.temp_dir.exists():
75
+ shutil.rmtree(self.temp_dir)
76
+ self.temp_dir.mkdir(parents=True)
77
+
78
+ # 创建目录结构
79
+ (self.temp_dir / 'config').mkdir()
80
+ (self.temp_dir / 'personality').mkdir()
81
+ (self.temp_dir / 'memories').mkdir()
82
+ (self.temp_dir / 'skills').mkdir()
83
+ (self.temp_dir / 'sessions').mkdir()
84
+ (self.temp_dir / 'state').mkdir()
85
+ (self.temp_dir / 'logs').mkdir()
86
+ (self.temp_dir / 'cron').mkdir()
87
+ (self.temp_dir / 'webui').mkdir()
88
+ (self.temp_dir / 'image_cache').mkdir()
89
+ (self.temp_dir / 'baoyu_skills').mkdir()
90
+
91
+ # 复制文件
92
+ try:
93
+ # 配置文件
94
+ if self.path_mapping['config'].exists():
95
+ shutil.copy2(self.path_mapping['config'], self.temp_dir / 'config' / 'config.yaml')
96
+
97
+ # 环境变量(敏感信息)
98
+ if self.path_mapping['env'].exists():
99
+ shutil.copy2(self.path_mapping['env'], self.temp_dir / 'config' / '.env')
100
+
101
+ # OAuth 认证
102
+ if self.path_mapping['auth'].exists():
103
+ shutil.copy2(self.path_mapping['auth'], self.temp_dir / 'config' / 'auth.json')
104
+
105
+ # 人格定义
106
+ if self.path_mapping['soul'].exists():
107
+ shutil.copy2(self.path_mapping['soul'], self.temp_dir / 'personality' / 'SOUL.md')
108
+
109
+ # 记忆
110
+ if self.path_mapping['memories'].exists():
111
+ shutil.copytree(self.path_mapping['memories'], self.temp_dir / 'memories', dirs_exist_ok=True)
112
+
113
+ # 技能
114
+ if self.path_mapping['skills'].exists():
115
+ shutil.copytree(self.path_mapping['skills'], self.temp_dir / 'skills', dirs_exist_ok=True)
116
+
117
+ # 会话
118
+ if self.path_mapping['sessions'].exists():
119
+ shutil.copytree(self.path_mapping['sessions'], self.temp_dir / 'sessions', dirs_exist_ok=True)
120
+
121
+ # 数据库
122
+ if self.path_mapping['state_db'].exists():
123
+ shutil.copy2(self.path_mapping['state_db'], self.temp_dir / 'state' / 'state.db')
124
+
125
+ # 日志
126
+ if self.path_mapping['logs'].exists():
127
+ shutil.copytree(self.path_mapping['logs'], self.temp_dir / 'logs', dirs_exist_ok=True)
128
+
129
+ # 定时任务
130
+ if self.path_mapping['cron'].exists():
131
+ shutil.copytree(self.path_mapping['cron'], self.temp_dir / 'cron', dirs_exist_ok=True)
132
+
133
+ # Image Cache
134
+ if self.path_mapping['image_cache'].exists():
135
+ shutil.copytree(self.path_mapping['image_cache'], self.temp_dir / 'image_cache', dirs_exist_ok=True)
136
+
137
+ # baoyu-skills 用户配置 (EXTEND.md 等)
138
+ if self.path_mapping['baoyu_skills'].exists():
139
+ shutil.copytree(self.path_mapping['baoyu_skills'], self.temp_dir / 'baoyu_skills', dirs_exist_ok=True)
140
+
141
+ # WebUI 认证 Token
142
+ if self.path_mapping['webui_token'].exists():
143
+ (self.temp_dir / 'webui').mkdir(exist_ok=True)
144
+ shutil.copy2(self.path_mapping['webui_token'], self.temp_dir / 'webui' / '.token')
145
+
146
+ # 修复临时目录权限(源文件可能被设为只读,如 baoyu-imagine scripts 的 555)
147
+ # upload_folder 需要能正常读取所有文件
148
+ logger.info("Fixing permissions in temp backup dir...")
149
+ import stat
150
+ for root, dirs, files in os.walk(self.temp_dir):
151
+ for d in dirs:
152
+ dir_path = os.path.join(root, d)
153
+ try:
154
+ os.chmod(dir_path, stat.S_IRWXU | stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH)
155
+ except Exception:
156
+ pass
157
+ for f in files:
158
+ file_path = os.path.join(root, f)
159
+ try:
160
+ os.chmod(file_path, stat.S_IRUSR | stat.S_IWUSR | stat.S_IRGRP | stat.S_IROTH)
161
+ except Exception:
162
+ pass
163
+
164
+ # 添加元数据
165
+ metadata = {
166
+ 'timestamp': datetime.now().isoformat(),
167
+ 'version': '0.10.0',
168
+ 'hermes_home': str(self.hermes_home)
169
+ }
170
+ with open(self.temp_dir / 'metadata.json', 'w') as f:
171
+ json.dump(metadata, f, indent=2)
172
+
173
+ logger.success(f"Backup prepared at {self.temp_dir}")
174
+ return self.temp_dir
175
+
176
+ except Exception as e:
177
+ logger.error(f"Failed to prepare backup: {e}")
178
+ raise
179
+
180
+ def upload_to_dataset(self, force: bool = False) -> bool:
181
+ """上传数据到 Hugging Face Dataset"""
182
+ try:
183
+ backup_dir = self.prepare_backup_data()
184
+
185
+ logger.info(f"Uploading to dataset: {self.dataset_repo}")
186
+
187
+ # 上传文件夹到 dataset
188
+ self.api.upload_folder(
189
+ folder_path=str(backup_dir),
190
+ repo_id=self.dataset_repo,
191
+ repo_type="dataset",
192
+ commit_message=f"Hermes Agent backup - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
193
+ )
194
+
195
+ logger.success("Backup uploaded successfully")
196
+ return True
197
+
198
+ except Exception as e:
199
+ logger.error(f"Failed to upload to dataset: {e}")
200
+ return False
201
+
202
+ def download_from_dataset(self) -> bool:
203
+ """从 Hugging Face Dataset 下载数据"""
204
+ try:
205
+ logger.info(f"Downloading from dataset: {self.dataset_repo}")
206
+
207
+ # 创建临时下载目录
208
+ download_dir = Path('/tmp/hermes_download')
209
+ if download_dir.exists():
210
+ shutil.rmtree(download_dir)
211
+ download_dir.mkdir(parents=True)
212
+
213
+ # 下载所有文件
214
+ self.api.snapshot_download(
215
+ repo_id=self.dataset_repo,
216
+ repo_type="dataset",
217
+ local_dir=str(download_dir)
218
+ )
219
+
220
+ logger.success("Download completed")
221
+
222
+ # 恢复数据到 Hermes 目录
223
+ self.restore_from_download(download_dir)
224
+ return True
225
+
226
+ except Exception as e:
227
+ logger.error(f"Failed to download from dataset: {e}")
228
+ return False
229
+
230
+ def restore_from_download(self, download_dir: Path):
231
+ """从下载的目录恢复数据
232
+
233
+ 注意: config.yaml 在恢复时被跳过,因为 entrypoint.sh 会根据环境变量
234
+ 重新生成正确的 config.yaml。如果恢复旧的 config.yaml,会导致模型
235
+ 配置被覆盖(例如 minimaxai/minimax-m2.7 被替换为旧模型)。
236
+ """
237
+ logger.info("Restoring data to Hermes home...")
238
+
239
+ # 确保目标目录存在
240
+ self.hermes_home.mkdir(parents=True, exist_ok=True)
241
+
242
+ # 恢��策略控制:
243
+ # SKIP_CONFIG_RESTORE=true(默认):跳过由 entrypoint.sh 动态生成的配置,防止旧备份覆盖新配置
244
+ # 跳过项:config.yaml、baoyu_skills/
245
+ # 恢复项:skills/(Skills Hub 安装的技能,entrypoint.sh 不会重新安装 cover/illustrator 等)
246
+ # 保留项:memories、sessions、state.db、.env、auth.json、logs、cron、webui/.token、SOUL.md(用户数据)
247
+ # SKIP_CONFIG_RESTORE=false:恢复所有备份(配置稳定后使用)
248
+ skip_restore = os.environ.get('SKIP_CONFIG_RESTORE', 'true').lower() in ('true', '1', 'yes')
249
+
250
+ # 定义跳过恢复的路径(entrypoint.sh 会重新生成这些配置)
251
+ # NOTE: skills 目录不跳过!它包含 Skills Hub 安装的技能(cover-image, article-illustrator 等),
252
+ # entrypoint.sh 只重新安装 baoyu-imagine,不会安装其他技能。
253
+ # 恢复后 entrypoint.sh 会覆盖 baoyu-imagine 确保其最新(第652-654行 cp -r)。
254
+ if skip_restore:
255
+ skip_paths = {
256
+ 'config/config.yaml', # 模型/供应商配置由 entrypoint.sh 根据环境变量生成
257
+ 'baoyu_skills', # baoyu-skills EXTEND.md 由 entrypoint.sh 重新生成
258
+ }
259
+ logger.info(f"SKIP_CONFIG_RESTORE=true, skipping: {', '.join(skip_paths)}")
260
+ else:
261
+ skip_paths = set()
262
+ logger.info("SKIP_CONFIG_RESTORE=false, restoring all backed-up configurations")
263
+
264
+ restore_mapping = {
265
+ 'config/.env': self.path_mapping['env'],
266
+ 'config/auth.json': self.path_mapping['auth'],
267
+ 'personality/SOUL.md': self.path_mapping['soul'],
268
+ 'memories': self.path_mapping['memories'],
269
+ 'skills': self.path_mapping['skills'],
270
+ 'sessions': self.path_mapping['sessions'],
271
+ 'state/state.db': self.path_mapping['state_db'],
272
+ 'logs': self.path_mapping['logs'],
273
+ 'cron': self.path_mapping['cron'],
274
+ 'webui/.token': self.path_mapping['webui_token'],
275
+ 'image_cache': self.path_mapping['image_cache'],
276
+ 'baoyu_skills': self.path_mapping['baoyu_skills'],
277
+ }
278
+
279
+ # config.yaml 特殊处理:
280
+ # 即使不在 skip_paths 中,如果 skip_restore=true,也不直接覆盖,而是保存到 .restored 供合并
281
+ if not skip_restore:
282
+ restore_mapping['config/config.yaml'] = self.path_mapping['config']
283
+ else:
284
+ # 恢复到 .restored 文件,供 entrypoint.sh 合并用户修改的配置区块(如 channels、display 等)
285
+ restored_path = self.hermes_home / 'config.yaml.restored'
286
+ src = download_dir / 'config' / 'config.yaml'
287
+ if src.exists():
288
+ shutil.copy2(src, restored_path)
289
+ logger.info("Restored config.yaml to config.yaml.restored for merge")
290
+
291
+ for src_rel, dst in restore_mapping.items():
292
+ # 检查是否在跳过列表中
293
+ if src_rel in skip_paths:
294
+ logger.info(f"Skipping restore of {src_rel} (will be regenerated by entrypoint.sh)")
295
+ continue
296
+
297
+ src = download_dir / src_rel
298
+ if src.exists():
299
+ try:
300
+ if src.is_file():
301
+ dst.parent.mkdir(parents=True, exist_ok=True)
302
+ shutil.copy2(src, dst)
303
+ logger.info(f"Restored: {src_rel}")
304
+ elif src.is_dir():
305
+ if dst.exists():
306
+ shutil.rmtree(dst)
307
+ shutil.copytree(src, dst)
308
+ logger.info(f"Restored directory: {src_rel}")
309
+ except Exception as e:
310
+ logger.error(f"Failed to restore {src_rel}: {e}")
311
+ else:
312
+ logger.warning(f"Not found in backup: {src_rel}")
313
+
314
+ logger.success("Data restoration completed")
315
+
316
+
317
+ class ConfigFileHandler(FileSystemEventHandler):
318
+ """配置文件变化处理器 - 实时同步到 Dataset 并触发重载"""
319
+
320
+ # 启动静默期(秒):在此期间内的文件变更不予备份,避免启动阶段冗余上传
321
+ STARTUP_GRACE_PERIOD = 30
322
+
323
+ def __init__(self, manager: DatasetManager):
324
+ self.manager = manager
325
+ self.last_backup_time = 0
326
+ self.backup_cooldown = 5 # 5秒内不重复备份
327
+ self.start_time = time.time() # 记录处理器创建时间
328
+ self._startup_logged = False
329
+
330
+ def on_modified(self, event):
331
+ """文件被修改时触发"""
332
+ if event.is_directory:
333
+ return
334
+
335
+ # 启动静默期:跳过启动阶段的配置变更备份
336
+ elapsed = time.time() - self.start_time
337
+ if elapsed < self.STARTUP_GRACE_PERIOD:
338
+ if not self._startup_logged:
339
+ logger.info(f"In startup grace period ({int(self.STARTUP_GRACE_PERIOD - elapsed)}s remaining), skipping backup for: {event.src_path}")
340
+ self._startup_logged = True
341
+ return
342
+
343
+ # 只关注关键配置文件
344
+ watched_files = ['config.yaml', '.env', 'auth.json']
345
+ if any(event.src_path.endswith(f) for f in watched_files):
346
+ current_time = time.time()
347
+ if current_time - self.last_backup_time > self.backup_cooldown:
348
+ logger.info(f"Config file changed: {event.src_path}")
349
+ logger.info("Triggering immediate backup...")
350
+ try:
351
+ self.manager.upload_to_dataset()
352
+ self.last_backup_time = current_time
353
+ logger.success("Immediate backup completed")
354
+
355
+ # 尝试触发 Hermes 配置重载
356
+ self._trigger_reload()
357
+
358
+ except Exception as e:
359
+ logger.error(f"Immediate backup failed: {e}")
360
+
361
+ def _trigger_reload(self):
362
+ """尝试触发 Hermes 配置重载"""
363
+ # 注意:Hermes 目前没有 config reload 命令
364
+ # 配置将在下次 Space 重启时自动生效
365
+ logger.info("Configuration saved. Please restart Space to apply changes immediately.")
366
+
367
+
368
+ def run_daemon():
369
+ """后台守护进程模式 - 定期同步 + 实时文件监听"""
370
+ logger.info("Starting data sync daemon...")
371
+
372
+ sync_interval = int(os.environ.get('SYNC_INTERVAL', '60')) # 默认60秒(实时模式)
373
+ manager = DatasetManager()
374
+
375
+ if not manager.validate():
376
+ logger.error("Configuration invalid, exiting")
377
+ sys.exit(1)
378
+
379
+ logger.info(f"Sync interval: {sync_interval} seconds")
380
+
381
+ # 如果 watchdog 可用,启动文件监听
382
+ observer = None
383
+ if WATCHDOG_AVAILABLE:
384
+ try:
385
+ logger.info("Starting file watcher for real-time sync...")
386
+ event_handler = ConfigFileHandler(manager)
387
+ observer = Observer()
388
+ observer.schedule(event_handler, str(manager.hermes_home), recursive=False)
389
+ observer.start()
390
+ logger.success("File watcher started - config changes will trigger immediate backup")
391
+ except Exception as e:
392
+ logger.error(f"Failed to start file watcher: {e}")
393
+ logger.warning("Falling back to scheduled sync only")
394
+ observer = None
395
+ else:
396
+ logger.warning("Watchdog not available, using scheduled sync only")
397
+
398
+ try:
399
+ while True:
400
+ try:
401
+ time.sleep(sync_interval)
402
+ logger.info("Performing scheduled backup...")
403
+ manager.upload_to_dataset()
404
+ except KeyboardInterrupt:
405
+ logger.info("Daemon stopped")
406
+ break
407
+ except Exception as e:
408
+ logger.error(f"Sync error: {e}")
409
+ finally:
410
+ # 清理文件监听器
411
+ if observer:
412
+ logger.info("Stopping file watcher...")
413
+ observer.stop()
414
+ observer.join()
415
+ logger.info("File watcher stopped")
416
+
417
+
418
+ def main():
419
+ parser = argparse.ArgumentParser(description='Hermes Agent Data Sync')
420
+ parser.add_argument('action', choices=['backup', 'restore', 'daemon'],
421
+ help='Action to perform')
422
+ parser.add_argument('--force', '-f', action='store_true',
423
+ help='Force backup even if no changes')
424
+
425
+ args = parser.parse_args()
426
+
427
+ manager = DatasetManager()
428
+
429
+ if not manager.validate():
430
+ logger.error("Configuration invalid")
431
+ sys.exit(1)
432
+
433
+ if args.action == 'backup':
434
+ success = manager.upload_to_dataset(force=args.force)
435
+ sys.exit(0 if success else 1)
436
+
437
+ elif args.action == 'restore':
438
+ success = manager.download_from_dataset()
439
+ sys.exit(0 if success else 1)
440
+
441
+ elif args.action == 'daemon':
442
+ run_daemon()
443
+
444
+
445
+ if __name__ == '__main__':
446
+ main()