playingapi commited on
Commit
9c9a615
·
verified ·
1 Parent(s): c040788

Create sync_data.sh

Browse files
Files changed (1) hide show
  1. sync_data.sh +125 -0
sync_data.sh ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # 检查环境变量
4
+ if [[ -z "$HF_TOKEN" ]] || [[ -z "$DATASET_ID" ]]; then
5
+ echo "Starting without backup functionality - missing HF_TOKEN or DATASET_ID"
6
+ exit 0
7
+ fi
8
+
9
+ # 激活虚拟环境(确保 huggingface_hub 可用)
10
+ source /opt/venv/bin/activate
11
+
12
+ # 上传备份
13
+ upload_backup() {
14
+ file_path="$1"
15
+ file_name="$2"
16
+ token="$HF_TOKEN"
17
+ repo_id="$DATASET_ID"
18
+
19
+ python3 -c "
20
+ from huggingface_hub import HfApi
21
+ import sys
22
+ import os
23
+ def manage_backups(api, repo_id, max_files=50):
24
+ files = api.list_repo_files(repo_id=repo_id, repo_type='dataset')
25
+ backup_files = [f for f in files if f.startswith('account_backup_') and f.endswith('.tar.gz')]
26
+ backup_files.sort()
27
+
28
+ if len(backup_files) >= max_files:
29
+ files_to_delete = backup_files[:(len(backup_files) - max_files + 1)]
30
+ for file_to_delete in files_to_delete:
31
+ try:
32
+ api.delete_file(path_in_repo=file_to_delete, repo_id=repo_id, repo_type='dataset')
33
+ print(f'Deleted old backup: {file_to_delete}')
34
+ except Exception as e:
35
+ print(f'Error deleting {file_to_delete}: {str(e)}')
36
+ api = HfApi(token='$token')
37
+ try:
38
+ api.upload_file(
39
+ path_or_fileobj='$file_path',
40
+ path_in_repo='$file_name',
41
+ repo_id='$repo_id',
42
+ repo_type='dataset'
43
+ )
44
+ print(f'Successfully uploaded $file_name')
45
+
46
+ manage_backups(api, '$repo_id')
47
+ except Exception as e:
48
+ print(f'Error uploading file: {str(e)}')
49
+ "
50
+ }
51
+
52
+ # 下载最新备份
53
+ download_latest_backup() {
54
+ token="$HF_TOKEN"
55
+ repo_id="$DATASET_ID"
56
+
57
+ python3 -c "
58
+ from huggingface_hub import HfApi
59
+ import sys
60
+ import os
61
+ import tarfile
62
+ import tempfile
63
+ api = HfApi(token='$token')
64
+ try:
65
+ files = api.list_repo_files(repo_id='$repo_id', repo_type='dataset')
66
+ backup_files = [f for f in files if f.startswith('account_backup_') and f.endswith('.tar.gz')]
67
+
68
+ if not backup_files:
69
+ print('No backup files found')
70
+ sys.exit()
71
+
72
+ latest_backup = sorted(backup_files)[-1]
73
+
74
+ with tempfile.TemporaryDirectory() as temp_dir:
75
+ filepath = api.hf_hub_download(
76
+ repo_id='$repo_id',
77
+ filename=latest_backup,
78
+ repo_type='dataset',
79
+ local_dir=temp_dir
80
+ )
81
+
82
+ if filepath and os.path.exists(filepath):
83
+ # 确保目标目录存在
84
+ os.makedirs('/app/account', exist_ok=True)
85
+ with tarfile.open(filepath, 'r:gz') as tar:
86
+ tar.extractall('/app')
87
+ print(f'Successfully restored backup from {latest_backup}')
88
+
89
+ except Exception as e:
90
+ print(f'Error downloading backup: {str(e)}')
91
+ "
92
+ }
93
+
94
+ # 首次启动时下载最新备份
95
+ echo "Downloading latest backup from HuggingFace..."
96
+ download_latest_backup
97
+
98
+ # 同步函数
99
+ sync_data() {
100
+ while true; do
101
+ echo "Starting sync process at $(date)"
102
+
103
+ if [ -d /app/account ]; then
104
+ timestamp=$(date +%Y%m%d_%H%M%S)
105
+ backup_file="account_backup_${timestamp}.tar.gz"
106
+
107
+ # 压缩 account 目录
108
+ tar -czf "/tmp/${backup_file}" -C /app/account .
109
+
110
+ echo "Uploading backup to HuggingFace..."
111
+ upload_backup "/tmp/${backup_file}" "${backup_file}"
112
+
113
+ rm -f "/tmp/${backup_file}"
114
+ else
115
+ echo "Account directory does not exist yet, waiting for next sync..."
116
+ fi
117
+
118
+ SYNC_INTERVAL=${SYNC_INTERVAL:-7200}
119
+ echo "Next sync in ${SYNC_INTERVAL} seconds..."
120
+ sleep $SYNC_INTERVAL
121
+ done
122
+ }
123
+
124
+ # 启动同步进程
125
+ sync_data