a8926764 commited on
Commit
f1bddc9
·
verified ·
1 Parent(s): 9c1cc61

Update sync.py

Browse files
Files changed (1) hide show
  1. sync.py +281 -51
sync.py CHANGED
@@ -1,9 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  #!/usr/bin/env python3
2
  """
3
  OpenClaw 配置同步脚本
4
  功能:从 Hugging Face Dataset 拉取配置,并定时推送变更回 Dataset
5
  作者:根据用户需求生成
6
  日期:2026-02-08
 
7
  """
8
 
9
  import os
@@ -11,8 +206,9 @@ import json
11
  import time
12
  import logging
13
  import hashlib
 
14
  from pathlib import Path
15
- from huggingface_hub import HfApi, Repository, snapshot_download
16
 
17
  # 配置日志
18
  logging.basicConfig(
@@ -36,7 +232,7 @@ class OpenClawConfigSync:
36
  self.repo_dir = Path('/tmp/openclaw_dataset')
37
 
38
  def calculate_file_hash(self, file_path):
39
- """计算文件MD5哈希值用于比较变更[1,2](@ref)"""
40
  hash_md5 = hashlib.md5()
41
  try:
42
  with open(file_path, "rb") as f:
@@ -52,70 +248,97 @@ class OpenClawConfigSync:
52
  self.local_config_dir.mkdir(parents=True, exist_ok=True)
53
 
54
  def download_from_dataset(self):
55
- """从Dataset拉取最新配置[8](@ref)"""
56
  try:
57
  logger.info(f'从Dataset拉取配置: {self.dataset_repo}')
58
 
59
- # 下载Dataset内容
60
- snapshot_dir = snapshot_download(
 
 
 
61
  repo_id=self.dataset_repo,
62
- token=self.hf_token,
63
- local_dir=self.repo_dir,
64
- allow_patterns=['*.json', '*.yaml', '*.yml'],
65
- force_download=True
66
  )
67
 
68
- config_src = Path(snapshot_dir)
69
- if not config_src.exists():
70
- logger.warning('Dataset中未找到配置文件,将使用默认配置')
71
  return False
72
 
73
- # 复制配置文件到本地目录
74
- for config_file in config_src.glob('*'):
75
- if config_file.is_file():
76
- dest_file = self.local_config_dir / config_file.name
77
- # 备份原文件
78
- if dest_file.exists():
79
- backup_file = dest_file.with_suffix(f'.bak{int(time.time())}')
80
- dest_file.rename(backup_file)
81
-
82
- import shutil
83
- shutil.copy2(config_file, dest_file)
84
- logger.info(f'已恢复配置: {config_file.name}')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
 
86
- return True
 
87
 
88
  except Exception as e:
89
  logger.error(f'从Dataset拉取配置失败: {e}')
90
  return False
91
 
92
  def upload_to_dataset(self):
93
- """推送配置变更回Dataset[8](@ref)"""
94
  try:
95
  logger.info('推送配置变更到Dataset')
96
 
97
- # 克隆Dataset仓库
98
- repo = Repository(
99
- local_dir=self.repo_dir,
100
- repo_id=self.dataset_repo,
101
- token=self.hf_token,
102
- clone_from=self.dataset_repo
103
- )
104
 
105
- # 复制更新的配置文件
106
- for config_file in self.local_config_dir.glob('*'):
107
- if config_file.suffix in ['.json', '.yaml', '.yml']:
108
- dest_file = self.repo_dir / config_file.name
109
- import shutil
110
- shutil.copy2(config_file, dest_file)
111
 
112
- # 提交并推送变更
113
- repo.git_add(auto_lfs_track=True)
114
- repo.git_commit(f"自动同步配置 {time.strftime('%Y-%m-%d %H:%M:%S')}")
115
- repo.git_push()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
 
117
- logger.info('配置功推送到Dataset')
118
- return True
119
 
120
  except Exception as e:
121
  logger.error(f'推送配置到Dataset失败: {e}')
@@ -124,15 +347,22 @@ class OpenClawConfigSync:
124
  def config_changed(self):
125
  """检查配置是否有变更"""
126
  try:
127
- for config_file in self.local_config_dir.glob('*.json'):
128
- current_hash = self.calculate_file_hash(config_file)
129
- # 这里可以比较当前哈希与上次保存的哈希值
130
- # 简化处理:总是返回True,确保定期备份
 
 
 
 
131
  return True
 
 
 
 
132
  except Exception as e:
133
  logger.error(f'检查配置变更失败: {e}')
134
-
135
- return True
136
 
137
  def run_sync(self, mode='download'):
138
  """运行同步流程"""
 
1
+ # #!/usr/bin/env python3
2
+ # """
3
+ # OpenClaw 配置同步脚本
4
+ # 功能:从 Hugging Face Dataset 拉取配置,并定时推送变更回 Dataset
5
+ # 作者:根据用户需求生成
6
+ # 日期:2026-02-08
7
+ # """
8
+
9
+ # import os
10
+ # import json
11
+ # import time
12
+ # import logging
13
+ # import hashlib
14
+ # from pathlib import Path
15
+ # from huggingface_hub import HfApi, Repository, snapshot_download
16
+
17
+ # # 配置日志
18
+ # logging.basicConfig(
19
+ # level=logging.INFO,
20
+ # format='%(asctime)s - %(levelname)s - %(message)s'
21
+ # )
22
+ # logger = logging.getLogger(__name__)
23
+
24
+ # class OpenClawConfigSync:
25
+ # def __init__(self):
26
+ # self.hf_token = os.getenv('HF_TOKEN', '')
27
+ # self.dataset_repo = os.getenv('HF_DATASET', '')
28
+ # self.local_config_dir = Path('/root/.openclaw')
29
+ # self.sync_interval = 300 # 5分钟同步一次
30
+
31
+ # if not self.hf_token or not self.dataset_repo:
32
+ # logger.error('HF_TOKEN 或 HF_DATASET 环境变量未设置')
33
+ # raise ValueError('缺少必要的环境变量')
34
+
35
+ # self.api = HfApi(token=self.hf_token)
36
+ # self.repo_dir = Path('/tmp/openclaw_dataset')
37
+
38
+ # def calculate_file_hash(self, file_path):
39
+ # """计算文件MD5哈希值用于比较变更[1,2](@ref)"""
40
+ # hash_md5 = hashlib.md5()
41
+ # try:
42
+ # with open(file_path, "rb") as f:
43
+ # for chunk in iter(lambda: f.read(4096), b""):
44
+ # hash_md5.update(chunk)
45
+ # return hash_md5.hexdigest()
46
+ # except Exception as e:
47
+ # logger.error(f"计算文件哈希失败 {file_path}: {e}")
48
+ # return None
49
+
50
+ # def ensure_local_dir(self):
51
+ # """确保本地配置目录存在"""
52
+ # self.local_config_dir.mkdir(parents=True, exist_ok=True)
53
+
54
+ # def download_from_dataset(self):
55
+ # """从Dataset拉取最新配置[8](@ref)"""
56
+ # try:
57
+ # logger.info(f'从Dataset拉取配置: {self.dataset_repo}')
58
+
59
+ # # 下载Dataset内容
60
+ # snapshot_dir = snapshot_download(
61
+ # repo_id=self.dataset_repo,
62
+ # token=self.hf_token,
63
+ # local_dir=self.repo_dir,
64
+ # allow_patterns=['*.json', '*.yaml', '*.yml'],
65
+ # force_download=True
66
+ # )
67
+
68
+ # config_src = Path(snapshot_dir)
69
+ # if not config_src.exists():
70
+ # logger.warning('Dataset中未找到配置文件,将使用默认配置')
71
+ # return False
72
+
73
+ # # 复制配置文件到本地目录
74
+ # for config_file in config_src.glob('*'):
75
+ # if config_file.is_file():
76
+ # dest_file = self.local_config_dir / config_file.name
77
+ # # 备份原文件
78
+ # if dest_file.exists():
79
+ # backup_file = dest_file.with_suffix(f'.bak{int(time.time())}')
80
+ # dest_file.rename(backup_file)
81
+
82
+ # import shutil
83
+ # shutil.copy2(config_file, dest_file)
84
+ # logger.info(f'已恢复配置: {config_file.name}')
85
+
86
+ # return True
87
+
88
+ # except Exception as e:
89
+ # logger.error(f'从Dataset拉取配置失败: {e}')
90
+ # return False
91
+
92
+ # def upload_to_dataset(self):
93
+ # """推送配置变更回Dataset[8](@ref)"""
94
+ # try:
95
+ # logger.info('推送配置变更到Dataset')
96
+
97
+ # # 克隆Dataset仓库
98
+ # repo = Repository(
99
+ # local_dir=self.repo_dir,
100
+ # repo_id=self.dataset_repo,
101
+ # token=self.hf_token,
102
+ # clone_from=self.dataset_repo
103
+ # )
104
+
105
+ # # 复制更新的配置文件
106
+ # for config_file in self.local_config_dir.glob('*'):
107
+ # if config_file.suffix in ['.json', '.yaml', '.yml']:
108
+ # dest_file = self.repo_dir / config_file.name
109
+ # import shutil
110
+ # shutil.copy2(config_file, dest_file)
111
+
112
+ # # 提交并推送变更
113
+ # repo.git_add(auto_lfs_track=True)
114
+ # repo.git_commit(f"自动同步配置 {time.strftime('%Y-%m-%d %H:%M:%S')}")
115
+ # repo.git_push()
116
+
117
+ # logger.info('配置已成功推送到Dataset')
118
+ # return True
119
+
120
+ # except Exception as e:
121
+ # logger.error(f'推送配置到Dataset失败: {e}')
122
+ # return False
123
+
124
+ # def config_changed(self):
125
+ # """检查配置是否有变更"""
126
+ # try:
127
+ # for config_file in self.local_config_dir.glob('*.json'):
128
+ # current_hash = self.calculate_file_hash(config_file)
129
+ # # 这里可以比较当前哈希与上次保存的哈希值
130
+ # # 简化处理:总是返回True,确保定期备份
131
+ # return True
132
+ # except Exception as e:
133
+ # logger.error(f'检查配置变更失败: {e}')
134
+
135
+ # return True
136
+
137
+ # def run_sync(self, mode='download'):
138
+ # """运行同步流程"""
139
+ # self.ensure_local_dir()
140
+
141
+ # if mode == 'download':
142
+ # return self.download_from_dataset()
143
+ # elif mode == 'upload':
144
+ # if self.config_changed():
145
+ # return self.upload_to_dataset()
146
+ # else:
147
+ # logger.info('配置无变更,跳过上传')
148
+ # return True
149
+ # return False
150
+
151
+ # def start_periodic_sync(self):
152
+ # """启动定时同步服务"""
153
+ # logger.info('启动定时同步服务')
154
+ # while True:
155
+ # try:
156
+ # time.sleep(self.sync_interval)
157
+ # self.run_sync('upload')
158
+ # except Exception as e:
159
+ # logger.error(f'定时同步失败: {e}')
160
+ # time.sleep(60) # 出错后等待1分钟再重试
161
+
162
+ # def main():
163
+ # """主函数"""
164
+ # import sys
165
+
166
+ # if len(sys.argv) != 2 or sys.argv[1] not in ['download', 'upload', 'sync']:
167
+ # print('用法: python sync.py [download|upload|sync]')
168
+ # sys.exit(1)
169
+
170
+ # mode = sys.argv[1]
171
+
172
+ # try:
173
+ # sync = OpenClawConfigSync()
174
+
175
+ # if mode == 'download':
176
+ # sync.run_sync('download')
177
+ # elif mode == 'upload':
178
+ # sync.run_sync('upload')
179
+ # elif mode == 'sync':
180
+ # # 后台运行定时同步
181
+ # import threading
182
+ # sync.run_sync('download')
183
+ # sync_thread = threading.Thread(target=sync.start_periodic_sync)
184
+ # sync_thread.daemon = True
185
+ # sync_thread.start()
186
+ # sync_thread.join()
187
+
188
+ # except Exception as e:
189
+ # logger.error(f'同步服务失败: {e}')
190
+ # sys.exit(1)
191
+
192
+ # if __name__ == '__main__':
193
+ # main()
194
+
195
  #!/usr/bin/env python3
196
  """
197
  OpenClaw 配置同步脚本
198
  功能:从 Hugging Face Dataset 拉取配置,并定时推送变更回 Dataset
199
  作者:根据用户需求生成
200
  日期:2026-02-08
201
+ 更新:修复 huggingface_hub 导入问题
202
  """
203
 
204
  import os
 
206
  import time
207
  import logging
208
  import hashlib
209
+ import shutil
210
  from pathlib import Path
211
+ from huggingface_hub import HfApi, hf_hub_download, upload_file, list_repo_files
212
 
213
  # 配置日志
214
  logging.basicConfig(
 
232
  self.repo_dir = Path('/tmp/openclaw_dataset')
233
 
234
  def calculate_file_hash(self, file_path):
235
+ """计算文件MD5哈希值用于比较变更"""
236
  hash_md5 = hashlib.md5()
237
  try:
238
  with open(file_path, "rb") as f:
 
248
  self.local_config_dir.mkdir(parents=True, exist_ok=True)
249
 
250
  def download_from_dataset(self):
251
+ """从Dataset拉取最新配置"""
252
  try:
253
  logger.info(f'从Dataset拉取配置: {self.dataset_repo}')
254
 
255
+ # 确保临时目录存在
256
+ self.repo_dir.mkdir(parents=True, exist_ok=True)
257
+
258
+ # 获取仓库中的文件列表
259
+ files = list_repo_files(
260
  repo_id=self.dataset_repo,
261
+ repo_type="dataset",
262
+ token=self.hf_token
 
 
263
  )
264
 
265
+ if not files:
266
+ logger.warning('Dataset中未找到配置文件')
 
267
  return False
268
 
269
+ # 下载所有配置文件
270
+ downloaded_count = 0
271
+ for file_name in files:
272
+ if file_name.endswith(('.json', '.yaml', '.yml')):
273
+ try:
274
+ # 下载文件
275
+ local_path = hf_hub_download(
276
+ repo_id=self.dataset_repo,
277
+ filename=file_name,
278
+ repo_type="dataset",
279
+ token=self.hf_token,
280
+ local_dir=self.repo_dir
281
+ )
282
+
283
+ # 复制到配置目录
284
+ config_file = Path(local_path)
285
+ dest_file = self.local_config_dir / config_file.name
286
+
287
+ # 备份原文件
288
+ if dest_file.exists():
289
+ backup_file = dest_file.with_suffix(f'.bak{int(time.time())}')
290
+ dest_file.rename(backup_file)
291
+ logger.debug(f'已备份原文件: {backup_file.name}')
292
+
293
+ shutil.copy2(config_file, dest_file)
294
+ logger.info(f'已恢复配置: {config_file.name}')
295
+ downloaded_count += 1
296
+
297
+ except Exception as e:
298
+ logger.error(f'下载文件 {file_name} 失败: {e}')
299
+ continue
300
 
301
+ logger.info(f'配置文件下载完成,共下载 {downloaded_count} 个文件')
302
+ return downloaded_count > 0
303
 
304
  except Exception as e:
305
  logger.error(f'从Dataset拉取配置失败: {e}')
306
  return False
307
 
308
  def upload_to_dataset(self):
309
+ """推送配置变更回Dataset"""
310
  try:
311
  logger.info('推送配置变更到Dataset')
312
 
313
+ # 获取本地配置文件
314
+ config_files = list(self.local_config_dir.glob('*'))
315
+ config_files = [f for f in config_files if f.suffix in ['.json', '.yaml', '.yml']]
 
 
 
 
316
 
317
+ if not config_files:
318
+ logger.warning('没有配置文件需要上传')
319
+ return False
 
 
 
320
 
321
+ uploaded_count = 0
322
+ for config_file in config_files:
323
+ try:
324
+ # 上传文件到Dataset
325
+ upload_file(
326
+ path_or_fileobj=str(config_file),
327
+ path_in_repo=config_file.name,
328
+ repo_id=self.dataset_repo,
329
+ repo_type="dataset",
330
+ token=self.hf_token,
331
+ commit_message=f"自动同步配置: {config_file.name} - {time.strftime('%Y-%m-%d %H:%M:%S')}"
332
+ )
333
+ logger.info(f'已上传配置: {config_file.name}')
334
+ uploaded_count += 1
335
+
336
+ except Exception as e:
337
+ logger.error(f'上传文件 {config_file.name} 失败: {e}')
338
+ continue
339
 
340
+ logger.info(f'配置文件上传完,共上传 {uploaded_count} 个文件')
341
+ return uploaded_count > 0
342
 
343
  except Exception as e:
344
  logger.error(f'推送配置到Dataset失败: {e}')
 
347
  def config_changed(self):
348
  """检查配置是否有变更"""
349
  try:
350
+ # 检查是否有配置文件
351
+ config_files = list(self.local_config_dir.glob('*.json'))
352
+ if not config_files:
353
+ return False
354
+
355
+ # 检查是否有 .bak 备份文件(表示有变更)
356
+ backup_files = list(self.local_config_dir.glob('*.bak*'))
357
+ if backup_files:
358
  return True
359
+
360
+ # 或者可以检查文件修改时间等
361
+ return True # 简化处理,总是返回True
362
+
363
  except Exception as e:
364
  logger.error(f'检查配置变更失败: {e}')
365
+ return True
 
366
 
367
  def run_sync(self, mode='download'):
368
  """运行同步流程"""