File size: 8,351 Bytes
42eda82
 
 
 
 
 
f1bddc9
42eda82
 
 
 
 
 
 
f1bddc9
42eda82
f1bddc9
42eda82
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f1bddc9
42eda82
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f1bddc9
42eda82
 
 
f1bddc9
 
 
 
 
42eda82
f1bddc9
 
42eda82
 
f1bddc9
 
42eda82
 
f1bddc9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42eda82
f1bddc9
 
42eda82
 
 
 
 
 
f1bddc9
42eda82
 
 
f1bddc9
 
 
42eda82
f1bddc9
 
 
42eda82
f1bddc9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42eda82
f1bddc9
 
42eda82
 
 
 
 
 
 
 
f1bddc9
 
 
 
 
 
 
 
42eda82
f1bddc9
 
 
 
42eda82
 
f1bddc9
42eda82
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
#!/usr/bin/env python3
"""
OpenClaw 配置同步脚本
功能:从 Hugging Face Dataset 拉取配置,并定时推送变更回 Dataset
作者:根据用户需求生成
日期:2026-02-08
更新:修复 huggingface_hub 导入问题
"""

import os
import json
import time
import logging
import hashlib
import shutil
from pathlib import Path
from huggingface_hub import HfApi, hf_hub_download, upload_file, list_repo_files

# 配置日志
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

class OpenClawConfigSync:
    def __init__(self):
        self.hf_token = os.getenv('HF_TOKEN', '')
        self.dataset_repo = os.getenv('HF_DATASET', '')
        self.local_config_dir = Path('/root/.openclaw')
        self.sync_interval = 300  # 5分钟同步一次
        
        if not self.hf_token or not self.dataset_repo:
            logger.error('HF_TOKEN 或 HF_DATASET 环境变量未设置')
            raise ValueError('缺少必要的环境变量')
        
        self.api = HfApi(token=self.hf_token)
        self.repo_dir = Path('/tmp/openclaw_dataset')
        
    def calculate_file_hash(self, file_path):
        """计算文件MD5哈希值用于比较变更"""
        hash_md5 = hashlib.md5()
        try:
            with open(file_path, "rb") as f:
                for chunk in iter(lambda: f.read(4096), b""):
                    hash_md5.update(chunk)
            return hash_md5.hexdigest()
        except Exception as e:
            logger.error(f"计算文件哈希失败 {file_path}: {e}")
            return None
    
    def ensure_local_dir(self):
        """确保本地配置目录存在"""
        self.local_config_dir.mkdir(parents=True, exist_ok=True)
    
    def download_from_dataset(self):
        """从Dataset拉取最新配置"""
        try:
            logger.info(f'从Dataset拉取配置: {self.dataset_repo}')
            
            # 确保临时目录存在
            self.repo_dir.mkdir(parents=True, exist_ok=True)
            
            # 获取仓库中的文件列表
            files = list_repo_files(
                repo_id=self.dataset_repo,
                repo_type="dataset",
                token=self.hf_token
            )
            
            if not files:
                logger.warning('Dataset中未找到配置文件')
                return False
            
            # 下载所有配置文件
            downloaded_count = 0
            for file_name in files:
                if file_name.endswith(('.json', '.yaml', '.yml')):
                    try:
                        # 下载文件
                        local_path = hf_hub_download(
                            repo_id=self.dataset_repo,
                            filename=file_name,
                            repo_type="dataset",
                            token=self.hf_token,
                            local_dir=self.repo_dir
                        )
                        
                        # 复制到配置目录
                        config_file = Path(local_path)
                        dest_file = self.local_config_dir / config_file.name
                        
                        # 备份原文件
                        if dest_file.exists():
                            backup_file = dest_file.with_suffix(f'.bak{int(time.time())}')
                            dest_file.rename(backup_file)
                            logger.debug(f'已备份原文件: {backup_file.name}')
                        
                        shutil.copy2(config_file, dest_file)
                        logger.info(f'已恢复配置: {config_file.name}')
                        downloaded_count += 1
                        
                    except Exception as e:
                        logger.error(f'下载文件 {file_name} 失败: {e}')
                        continue
            
            logger.info(f'配置文件下载完成,共下载 {downloaded_count} 个文件')
            return downloaded_count > 0
            
        except Exception as e:
            logger.error(f'从Dataset拉取配置失败: {e}')
            return False
    
    def upload_to_dataset(self):
        """推送配置变更回Dataset"""
        try:
            logger.info('推送配置变更到Dataset')
            
            # 获取本地配置文件
            config_files = list(self.local_config_dir.glob('*'))
            config_files = [f for f in config_files if f.suffix in ['.json', '.yaml', '.yml']]
            
            if not config_files:
                logger.warning('没有配置文件需要上传')
                return False
            
            uploaded_count = 0
            for config_file in config_files:
                try:
                    # 上传文件到Dataset
                    upload_file(
                        path_or_fileobj=str(config_file),
                        path_in_repo=config_file.name,
                        repo_id=self.dataset_repo,
                        repo_type="dataset",
                        token=self.hf_token,
                        commit_message=f"自动同步配置: {config_file.name} - {time.strftime('%Y-%m-%d %H:%M:%S')}"
                    )
                    logger.info(f'已上传配置: {config_file.name}')
                    uploaded_count += 1
                    
                except Exception as e:
                    logger.error(f'上传文件 {config_file.name} 失败: {e}')
                    continue
            
            logger.info(f'配置文件上传完成,共上传 {uploaded_count} 个文件')
            return uploaded_count > 0
            
        except Exception as e:
            logger.error(f'推送配置到Dataset失败: {e}')
            return False
    
    def config_changed(self):
        """检查配置是否有变更"""
        try:
            # 检查是否有配置文件
            config_files = list(self.local_config_dir.glob('*.json'))
            if not config_files:
                return False
                
            # 检查是否有 .bak 备份文件(表示有变更)
            backup_files = list(self.local_config_dir.glob('*.bak*'))
            if backup_files:
                return True
                
            # 或者可以检查文件修改时间等
            return True  # 简化处理,总是返回True
            
        except Exception as e:
            logger.error(f'检查配置变更失败: {e}')
            return True
    
    def run_sync(self, mode='download'):
        """运行同步流程"""
        self.ensure_local_dir()
        
        if mode == 'download':
            return self.download_from_dataset()
        elif mode == 'upload':
            if self.config_changed():
                return self.upload_to_dataset()
            else:
                logger.info('配置无变更,跳过上传')
                return True
        return False
    
    def start_periodic_sync(self):
        """启动定时同步服务"""
        logger.info('启动定时同步服务')
        while True:
            try:
                time.sleep(self.sync_interval)
                self.run_sync('upload')
            except Exception as e:
                logger.error(f'定时同步失败: {e}')
                time.sleep(60)  # 出错后等待1分钟再重试

def main():
    """主函数"""
    import sys
    
    if len(sys.argv) != 2 or sys.argv[1] not in ['download', 'upload', 'sync']:
        print('用法: python sync.py [download|upload|sync]')
        sys.exit(1)
    
    mode = sys.argv[1]
    
    try:
        sync = OpenClawConfigSync()
        
        if mode == 'download':
            sync.run_sync('download')
        elif mode == 'upload':
            sync.run_sync('upload')
        elif mode == 'sync':
            # 后台运行定时同步
            import threading
            sync.run_sync('download')
            sync_thread = threading.Thread(target=sync.start_periodic_sync)
            sync_thread.daemon = True
            sync_thread.start()
            sync_thread.join()
            
    except Exception as e:
        logger.error(f'同步服务失败: {e}')
        sys.exit(1)

if __name__ == '__main__':
    main()