#!/usr/bin/env python3 """ OpenClaw 记忆同步脚本 将本地记忆文件同步至 Hugging Face Dataset,并从 Dataset 恢复。 """ import os import sys import time import logging from pathlib import Path from huggingface_hub import HfApi, hf_hub_download, upload_file, list_repo_files # 配置日志 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) class MemorySync: def __init__(self): self.hf_token = os.getenv('HF_TOKEN') # 请将下面的 'your-username/openclaw-memory' 替换成你实际创建的 Dataset 名字 self.hf_dataset = os.getenv('HF_MEMORY_DATASET', 'a8926764/openclaw-memory') # 确保这个路径和启动脚本中设置的 OPENCLAW_MEMORY_DIR 一致 self.local_memory_dir = Path(os.getenv('OPENCLAW_MEMORY_DIR', '/root/.openclaw/memory')) self.api = HfApi(token=self.hf_token) def ensure_memory_dir(self): """确保本地记忆目录存在""" self.local_memory_dir.mkdir(parents=True, exist_ok=True) logger.info(f"Memory directory ensured: {self.local_memory_dir}") def download_memory(self): """从 Dataset 拉取所有记忆文件(.md 文件)""" self.ensure_memory_dir() try: logger.info(f'开始从 Dataset 下载记忆文件: {self.hf_dataset}') files = list_repo_files(repo_id=self.hf_dataset, repo_type="dataset") downloaded_count = 0 for file_name in files: if file_name.endswith('.md'): try: local_path = hf_hub_download( repo_id=self.hf_dataset, filename=file_name, repo_type="dataset", token=self.hf_token, local_dir=self.local_memory_dir ) logger.info(f'已下载记忆文件: {file_name}') downloaded_count += 1 except Exception as e: logger.error(f'下载文件 {file_name} 失败: {e}') continue if downloaded_count == 0: logger.info('Dataset 中未找到记忆文件,将使用新记忆。') else: logger.info(f'记忆文件下载完成,共下载 {downloaded_count} 个文件。') return True except Exception as e: logger.error(f'从 Dataset 拉取记忆失败: {e}') # 首次运行时 Dataset 可能为空,这不一定是错误,返回 True 允许流程继续 return True def upload_memory(self): """推送本地所有 .md 记忆文件到 Dataset""" self.ensure_memory_dir() try: logger.info(f'开始推送记忆文件到 Dataset: {self.hf_dataset}') memory_files = list(self.local_memory_dir.glob('*.md')) if not memory_files: logger.warning('本地没有找到 .md 格式的记忆文件,跳过上传。') return False uploaded_count = 0 for mem_file in memory_files: try: upload_file( path_or_fileobj=str(mem_file), path_in_repo=mem_file.name, repo_id=self.hf_dataset, repo_type="dataset", token=self.hf_token, commit_message=f"自动同步记忆: {mem_file.name} - {time.strftime('%Y-%m-%d %H:%M:%S')}" ) logger.info(f'已上传记忆文件: {mem_file.name}') uploaded_count += 1 except Exception as e: logger.error(f'上传文件 {mem_file.name} 失败: {e}') continue logger.info(f'记忆文件上传完成,共上传 {uploaded_count} 个文件。') return uploaded_count > 0 except Exception as e: logger.error(f'推送记忆到 Dataset 失败: {e}') return False def main(): """主函数,处理命令行参数""" if len(sys.argv) != 2 or sys.argv[1] not in ['download', 'upload']: print('用法: python memory_sync.py [download|upload]') sys.exit(1) sync = MemorySync() mode = sys.argv[1] if mode == 'download': success = sync.download_memory() sys.exit(0 if success else 1) elif mode == 'upload': success = sync.upload_memory() sys.exit(0 if success else 1) if __name__ == '__main__': main()