kevin commited on
Commit
fc46a84
·
1 Parent(s): b65b6dc
Files changed (5) hide show
  1. .gitignore +1 -0
  2. Dockerfile +30 -0
  3. HOW-TO-USE.md +4 -0
  4. README.md +1 -0
  5. sync_data.sh +91 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .idea
Dockerfile ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM lanol/filecodebox:beta
2
+
3
+ RUN apt-get update && apt-get install -y \
4
+ python3-pip \
5
+ git \
6
+ && rm -rf /var/lib/apt/lists/*
7
+
8
+ RUN pip3 install --no-cache-dir huggingface_hub datasets
9
+
10
+ RUN useradd -m -u 1000 user
11
+
12
+ WORKDIR /app
13
+
14
+ ENV HOME=/home/user \
15
+ PATH=/home/user/.local/bin:$PATH \
16
+ HF_HOME=/app/data/hf_cache \
17
+ PYTHONUNBUFFERED=1
18
+
19
+ RUN mkdir -p /app/data && \
20
+ chown -R user:user /app/data
21
+
22
+ COPY sync_data.sh /app/
23
+ RUN chmod +x /app/sync_data.sh && \
24
+ chown user:user /app/sync_data.sh
25
+
26
+ USER user
27
+
28
+ EXPOSE 12345
29
+
30
+ ENTRYPOINT ["/app/sync_data.sh"]
HOW-TO-USE.md ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ 添加环境变量
2
+ HF_TOKEN #你的token
3
+ DATASET_ID # 用户名/数据集名称
4
+ SYNC_INTERVAL # 同步时间(秒钟)
README.md CHANGED
@@ -5,6 +5,7 @@ colorFrom: green
5
  colorTo: gray
6
  sdk: docker
7
  pinned: false
 
8
  ---
9
 
10
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
5
  colorTo: gray
6
  sdk: docker
7
  pinned: false
8
+ app_port: 12345
9
  ---
10
 
11
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
sync_data.sh ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # 检查环境变量
4
+ if [ -z "$HF_TOKEN" ] || [ -z "$DATASET_ID" ]; then
5
+ echo "Starting without backup functionality - missing HF_TOKEN or DATASET_ID"
6
+ exec python main.py
7
+ fi
8
+
9
+ # 登录HuggingFace (使用环境变量方式避免交互问题)
10
+ export HUGGING_FACE_HUB_TOKEN=$HF_TOKEN
11
+
12
+ # 同步函数
13
+ sync_data() {
14
+ while true; do
15
+ echo "Starting sync process at $(date)"
16
+
17
+ # 创建临时压缩文件
18
+ cd /app
19
+ timestamp=$(date +%Y%m%d_%H%M%S)
20
+ backup_file="backup_${timestamp}.tar.gz"
21
+
22
+ tar -czf "/tmp/${backup_file}" data/
23
+
24
+ # 使用Python脚本上传到HuggingFace
25
+ python3 -c "
26
+ try:
27
+ from huggingface_hub import HfApi
28
+ import os
29
+
30
+ api = HfApi()
31
+ api.upload_file(
32
+ path_or_fileobj='/tmp/${backup_file}',
33
+ path_in_repo='${backup_file}',
34
+ repo_id='${DATASET_ID}',
35
+ repo_type='dataset'
36
+ )
37
+ print('Backup completed successfully')
38
+ except Exception as e:
39
+ print(f'Backup failed: {str(e)}')
40
+ "
41
+ # 清理临时文件
42
+ rm -f "/tmp/${backup_file}"
43
+
44
+ # 设置同步间隔
45
+ SYNC_INTERVAL=${SYNC_INTERVAL:-7200}
46
+ echo "Next sync in ${SYNC_INTERVAL} seconds..."
47
+ sleep $SYNC_INTERVAL
48
+ done
49
+ }
50
+
51
+ # 恢复函数
52
+ restore_latest() {
53
+ echo "Attempting to restore latest backup..."
54
+ python3 -c "
55
+ try:
56
+ from huggingface_hub import HfApi
57
+ import os
58
+
59
+ api = HfApi()
60
+ files = api.list_repo_files('${DATASET_ID}', repo_type='dataset')
61
+ backup_files = [f for f in files if f.startswith('backup_') and f.endswith('.tar.gz')]
62
+
63
+ if backup_files:
64
+ latest = sorted(backup_files)[-1]
65
+ api.hf_hub_download(
66
+ repo_id='${DATASET_ID}',
67
+ filename=latest,
68
+ repo_type='dataset',
69
+ local_dir='/tmp'
70
+ )
71
+ os.system(f'tar -xzf /tmp/{latest} -C /app')
72
+ os.remove(f'/tmp/{latest}')
73
+ print(f'Restored from {latest}')
74
+ else:
75
+ print('No backup found')
76
+ except Exception as e:
77
+ print(f'Restore failed: {str(e)}')
78
+ "
79
+ }
80
+
81
+ # 主程序
82
+ (
83
+ # 尝试恢复
84
+ restore_latest
85
+
86
+ # 启动同步进程
87
+ sync_data &
88
+
89
+ # 启动主应用
90
+ exec python main.py
91
+ ) 2>&1 | tee -a /app/data/backup.log