xjf666 commited on
Commit
60e533b
·
verified ·
1 Parent(s): c3269aa

Create sync_data.sh

Browse files
Files changed (1) hide show
  1. sync_data.sh +124 -0
sync_data.sh ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/sh
2
+
3
+ # 检查环境变量
4
+ if [ -z "$HF_TOKEN" ] || [ -z "$DATASET_ID" ]; then
5
+ echo "Starting without backup functionality - missing HF_TOKEN or DATASET_ID"
6
+ exit 1
7
+ fi
8
+
9
+ # 激活虚拟环境
10
+ . /easynode/venv/bin/activate
11
+
12
+ # 上传备份
13
+ cat > /easynode/app/hf_sync.py << 'EOL'
14
+ from huggingface_hub import HfApi
15
+ import sys
16
+ import os
17
+ import tarfile
18
+ import tempfile
19
+
20
+ def manage_backups(api, repo_id, max_files=50):
21
+ files = api.list_repo_files(repo_id=repo_id, repo_type="dataset")
22
+ backup_files = [f for f in files if f.startswith('easynode_backup_') and f.endswith('.tar.gz')]
23
+ backup_files.sort()
24
+
25
+ if len(backup_files) >= max_files:
26
+ files_to_delete = backup_files[:(len(backup_files) - max_files + 1)]
27
+ for file_to_delete in files_to_delete:
28
+ try:
29
+ api.delete_file(path_in_repo=file_to_delete, repo_id=repo_id, repo_type="dataset")
30
+ print(f'Deleted old backup: {file_to_delete}')
31
+ except Exception as e:
32
+ print(f'Error deleting {file_to_delete}: {str(e)}')
33
+
34
+ def upload_backup(file_path, file_name, token, repo_id):
35
+ api = HfApi(token=token)
36
+ try:
37
+ api.upload_file(
38
+ path_or_fileobj=file_path,
39
+ path_in_repo=file_name,
40
+ repo_id=repo_id,
41
+ repo_type="dataset"
42
+ )
43
+ print(f"Successfully uploaded {file_name}")
44
+
45
+ manage_backups(api, repo_id)
46
+ except Exception as e:
47
+ print(f"Error uploading file: {str(e)}")
48
+
49
+ # 下载最新备份
50
+ def download_latest_backup(token, repo_id):
51
+ try:
52
+ api = HfApi(token=token)
53
+ files = api.list_repo_files(repo_id=repo_id, repo_type="dataset")
54
+ backup_files = [f for f in files if f.startswith('easynode_backup_') and f.endswith('.tar.gz')]
55
+
56
+ if not backup_files:
57
+ print("No backup files found")
58
+ return
59
+
60
+ latest_backup = sorted(backup_files)[-1]
61
+
62
+ with tempfile.TemporaryDirectory() as temp_dir:
63
+ filepath = api.hf_hub_download(
64
+ repo_id=repo_id,
65
+ filename=latest_backup,
66
+ repo_type="dataset",
67
+ local_dir=temp_dir
68
+ )
69
+
70
+ if filepath and os.path.exists(filepath):
71
+ with tarfile.open(filepath, 'r:gz') as tar:
72
+ tar.extractall('/easynode/app/')
73
+ print(f"Successfully restored backup from {latest_backup}")
74
+
75
+ except Exception as e:
76
+ print(f"Error downloading backup: {str(e)}")
77
+
78
+ if __name__ == "__main__":
79
+ action = sys.argv[1]
80
+ token = sys.argv[2]
81
+ repo_id = sys.argv[3]
82
+
83
+ if action == "upload":
84
+ file_path = sys.argv[4]
85
+ file_name = sys.argv[5]
86
+ upload_backup(file_path, file_name, token, repo_id)
87
+ elif action == "download":
88
+ download_latest_backup(token, repo_id)
89
+ EOL
90
+
91
+ # 首次启动时下载最新备份
92
+ echo "Downloading latest backup from HuggingFace..."
93
+ python hf_sync.py download "${HF_TOKEN}" "${DATASET_ID}"
94
+
95
+ # 同步函数
96
+ sync_data() {
97
+ while true; do
98
+ echo "Starting sync process at $(date)"
99
+
100
+ if [ -d "/easynode/app/db" ]; then
101
+ timestamp=$(date +%Y%m%d_%H%M%S)
102
+ backup_file="easynode_backup_${timestamp}.tar.gz"
103
+
104
+ tar -czf "/tmp/${backup_file}" db/
105
+
106
+ echo "Uploading backup to HuggingFace..."
107
+ python hf_sync.py upload "${HF_TOKEN}" "${DATASET_ID}" "/tmp/${backup_file}" "${backup_file}"
108
+
109
+ rm -f "/tmp/${backup_file}"
110
+ else
111
+ echo "Data directory does not exist yet, waiting for next sync..."
112
+ fi
113
+
114
+ SYNC_INTERVAL=${SYNC_INTERVAL:-7200}
115
+ echo "Next sync in ${SYNC_INTERVAL} seconds..."
116
+ sleep $SYNC_INTERVAL
117
+ done
118
+ }
119
+
120
+ # 启动同步进程
121
+ sync_data &
122
+
123
+ # 启动主应用
124
+ exec npm run start