wangyikunkun commited on
Commit
c6a47d0
·
verified ·
1 Parent(s): ee8773d

Create sync_data.sh

Browse files
Files changed (1) hide show
  1. sync_data.sh +122 -0
sync_data.sh ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/sh
2
+
3
+ # 检查环境变量
4
+ if [ -z "$HF_TOKEN" ] || [ -z "$DATASET_ID" ]; then
5
+ echo "Starting without backup functionality - missing HF_TOKEN or DATASET_ID"
6
+ exec node ./src/app/app.js
7
+ exit 0
8
+ fi
9
+
10
+ # 激活虚拟环境
11
+ . /opt/venv/bin/activate
12
+
13
+ # 上传备份
14
+ upload_backup() {
15
+ file_path="$1"
16
+ file_name="$2"
17
+
18
+ python3 -c "
19
+ from huggingface_hub import HfApi
20
+ import sys
21
+ import os
22
+ def manage_backups(api, repo_id, max_files=50):
23
+ files = api.list_repo_files(repo_id=repo_id, repo_type='dataset')
24
+ backup_files = [f for f in files if f.startswith('electerm_backup_') and f.endswith('.tar.gz')]
25
+ backup_files.sort()
26
+
27
+ if len(backup_files) >= max_files:
28
+ files_to_delete = backup_files[:(len(backup_files) - max_files + 1)]
29
+ for file_to_delete in files_to_delete:
30
+ try:
31
+ api.delete_file(path_in_repo=file_to_delete, repo_id=repo_id, repo_type='dataset')
32
+ print(f'Deleted old backup: {file_to_delete}')
33
+ except Exception as e:
34
+ print(f'Error deleting {file_to_delete}: {str(e)}')
35
+ api = HfApi(token='$HF_TOKEN')
36
+ try:
37
+ api.upload_file(
38
+ path_or_fileobj='$file_path',
39
+ path_in_repo='$file_name',
40
+ repo_id='$DATASET_ID',
41
+ repo_type='dataset'
42
+ )
43
+ print(f'Successfully uploaded $file_name')
44
+
45
+ manage_backups(api, '$DATASET_ID')
46
+ except Exception as e:
47
+ print(f'Error uploading file: {str(e)}')
48
+ "
49
+ }
50
+
51
+ # 下载最新备份
52
+ download_latest_backup() {
53
+ python3 -c "
54
+ from huggingface_hub import HfApi
55
+ import sys
56
+ import os
57
+ import tarfile
58
+ import tempfile
59
+ api = HfApi(token='$HF_TOKEN')
60
+ try:
61
+ files = api.list_repo_files(repo_id='$DATASET_ID', repo_type='dataset')
62
+ backup_files = [f for f in files if f.startswith('electerm_backup_') and f.endswith('.tar.gz')]
63
+
64
+ if not backup_files:
65
+ print('No backup files found')
66
+ sys.exit()
67
+
68
+ latest_backup = sorted(backup_files)[-1]
69
+
70
+ with tempfile.TemporaryDirectory() as temp_dir:
71
+ filepath = api.hf_hub_download(
72
+ repo_id='$DATASET_ID',
73
+ filename=latest_backup,
74
+ repo_type='dataset',
75
+ local_dir=temp_dir
76
+ )
77
+
78
+ if filepath and os.path.exists(filepath):
79
+ with tarfile.open(filepath, 'r:gz') as tar:
80
+ tar.extractall('/app/electerm-web/data')
81
+ print(f'Successfully restored backup from {latest_backup}')
82
+
83
+ except Exception as e:
84
+ print(f'Error downloading backup: {str(e)}')
85
+ "
86
+ }
87
+
88
+ # 首次启动时下载最新备份
89
+ echo "Downloading latest backup from HuggingFace..."
90
+ download_latest_backup
91
+
92
+ # 同步函数
93
+ sync_data() {
94
+ while true; do
95
+ echo "Starting sync process at $(date)"
96
+
97
+ if [ -d /app/electerm-web/data ]; then
98
+ timestamp=$(date +%Y%m%d_%H%M%S)
99
+ backup_file="electerm_backup_${timestamp}.tar.gz"
100
+
101
+ # 压缩数据目录
102
+ tar -czf "/tmp/${backup_file}" -C /app/electerm-web/data .
103
+
104
+ echo "Uploading backup to HuggingFace..."
105
+ upload_backup "/tmp/${backup_file}" "${backup_file}"
106
+
107
+ rm -f "/tmp/${backup_file}"
108
+ else
109
+ echo "Data directory does not exist yet, waiting for next sync..."
110
+ fi
111
+
112
+ SYNC_INTERVAL=${SYNC_INTERVAL:-7200}
113
+ echo "Next sync in ${SYNC_INTERVAL} seconds..."
114
+ sleep $SYNC_INTERVAL
115
+ done
116
+ }
117
+
118
+ # 后台启动同步进程
119
+ sync_data &
120
+
121
+ # 启动 Electerm
122
+ exec node ./src/app/app.js