Spaces:
Running
Running
| #!/usr/bin/env python3 | |
| """ | |
| 文件同步脚本:将 /data/workspace 的文件同步到 Hugging Face Dataset | |
| 用于持久化存储 ClawDBot 生成的文件 | |
| 使用方法: | |
| 1. 创建一个 Dataset: huggingface-cli repo create clawdbot-data --type dataset | |
| 2. 设置环境变量: HF_DATASET_REPO=acpr123/clawdbot-data | |
| 3. 在 entrypoint.sh 中添加定期运行此脚本的 cron job | |
| """ | |
| import os | |
| import time | |
| from pathlib import Path | |
| from huggingface_hub import HfApi, CommitOperationAdd | |
| def sync_workspace_to_hf(): | |
| """同步 workspace 目录到 HF Dataset""" | |
| hf_token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_API_KEY") | |
| dataset_repo = os.environ.get("HF_DATASET_REPO", "acpr123/clawdbot-data") | |
| workspace_dir = Path("/data/workspace") | |
| if not hf_token: | |
| print("⚠️ 未配置 HF_TOKEN,跳过文件同步") | |
| return | |
| if not workspace_dir.exists(): | |
| print(f"⚠️ Workspace 目录不存在: {workspace_dir}") | |
| return | |
| # 收集所有文件 | |
| operations = [] | |
| for file_path in workspace_dir.rglob("*"): | |
| if file_path.is_file(): | |
| relative_path = file_path.relative_to(workspace_dir) | |
| operations.append( | |
| CommitOperationAdd( | |
| path_in_repo=str(relative_path), | |
| path_or_fileobj=str(file_path), | |
| ) | |
| ) | |
| if not operations: | |
| print("📁 Workspace 为空,无需同步") | |
| return | |
| try: | |
| api = HfApi() | |
| api.create_commit( | |
| repo_id=dataset_repo, | |
| repo_type="dataset", | |
| operations=operations, | |
| commit_message=f"Sync workspace files at {time.strftime('%Y-%m-%d %H:%M:%S')}", | |
| token=hf_token, | |
| ) | |
| print(f"✅ 已同步 {len(operations)} 个文件到 {dataset_repo}") | |
| except Exception as e: | |
| print(f"❌ 同步失败: {e}") | |
| if __name__ == "__main__": | |
| print("🔄 开始同步文件到 Hugging Face Dataset...") | |
| sync_workspace_to_hf() | |