acpr123 commited on
Commit
546d015
·
verified ·
1 Parent(s): bde7dc5

Upload sync_files.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. sync_files.py +62 -0
sync_files.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ 文件同步脚本:将 /data/workspace 的文件同步到 Hugging Face Dataset
4
+ 用于持久化存储 ClawDBot 生成的文件
5
+
6
+ 使用方法:
7
+ 1. 创建一个 Dataset: huggingface-cli repo create clawdbot-data --type dataset
8
+ 2. 设置环境变量: HF_DATASET_REPO=acpr123/clawdbot-data
9
+ 3. 在 entrypoint.sh 中添加定期运行此脚本的 cron job
10
+ """
11
+ import os
12
+ import time
13
+ from pathlib import Path
14
+ from huggingface_hub import HfApi, CommitOperationAdd
15
+
16
+ def sync_workspace_to_hf():
17
+ """同步 workspace 目录到 HF Dataset"""
18
+
19
+ hf_token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_API_KEY")
20
+ dataset_repo = os.environ.get("HF_DATASET_REPO", "acpr123/clawdbot-data")
21
+ workspace_dir = Path("/data/workspace")
22
+
23
+ if not hf_token:
24
+ print("⚠️ 未配置 HF_TOKEN,跳过文件同步")
25
+ return
26
+
27
+ if not workspace_dir.exists():
28
+ print(f"⚠️ Workspace 目录不存在: {workspace_dir}")
29
+ return
30
+
31
+ # 收集所有文件
32
+ operations = []
33
+ for file_path in workspace_dir.rglob("*"):
34
+ if file_path.is_file():
35
+ relative_path = file_path.relative_to(workspace_dir)
36
+ operations.append(
37
+ CommitOperationAdd(
38
+ path_in_repo=str(relative_path),
39
+ path_or_fileobj=str(file_path),
40
+ )
41
+ )
42
+
43
+ if not operations:
44
+ print("📁 Workspace 为空,无需同步")
45
+ return
46
+
47
+ try:
48
+ api = HfApi()
49
+ api.create_commit(
50
+ repo_id=dataset_repo,
51
+ repo_type="dataset",
52
+ operations=operations,
53
+ commit_message=f"Sync workspace files at {time.strftime('%Y-%m-%d %H:%M:%S')}",
54
+ token=hf_token,
55
+ )
56
+ print(f"✅ 已同步 {len(operations)} 个文件到 {dataset_repo}")
57
+ except Exception as e:
58
+ print(f"❌ 同步失败: {e}")
59
+
60
+ if __name__ == "__main__":
61
+ print("🔄 开始同步文件到 Hugging Face Dataset...")
62
+ sync_workspace_to_hf()