flzta commited on
Commit
41bd8ef
·
verified ·
1 Parent(s): b668574

Create sync_data.sh

Browse files
Files changed (1) hide show
  1. sync_data.sh +206 -0
sync_data.sh ADDED
@@ -0,0 +1,206 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # 检查环境变量
4
+ if [[ -z "$HF_TOKEN" ]] || [[ -z "$DATASET_ID" ]]; then
5
+ echo "Starting without backup functionality - missing HF_TOKEN or DATASET_ID"
6
+ exec /opt/cloudreve/cloudreve -c /opt/cloudreve/config.ini
7
+ exit 0
8
+ fi
9
+
10
+ # 设置解密密钥 (请务必设置一个长且随机的字符串)
11
+ ENCRYPTION_KEY=${ENCRYPTION_KEY:-"请在此处设置您的加密密钥,这是一个长且随机的字符串"}
12
+ BACKUP_DELAY=${BACKUP_DELAY:-7200} # 默认备份间隔为 2 小时
13
+
14
+ # 激活虚拟环境
15
+ source /opt/venv/bin/activate
16
+
17
+ # 上传备份
18
+ upload_backup() {
19
+ file_path="$1"
20
+ file_name="$2"
21
+ token="$HF_TOKEN"
22
+ repo_id="$DATASET_ID"
23
+ encryption_key="$ENCRYPTION_KEY"
24
+
25
+ python3 -c "
26
+ from huggingface_hub import HfApi
27
+ import sys
28
+ import os
29
+ import base64
30
+ from cryptography.fernet import Fernet
31
+ from cryptography.hazmat.primitives import hashes
32
+ from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC
33
+ import io
34
+ def generate_key(password, salt=b'cloudreve_salt'):
35
+ kdf = PBKDF2HMAC(
36
+ algorithm=hashes.SHA256(),
37
+ length=32,
38
+ salt=salt,
39
+ iterations=100000,
40
+ )
41
+ key = base64.urlsafe_b64encode(kdf.derive(password.encode()))
42
+ return key
43
+ def encrypt_file(file_path, key):
44
+ f = Fernet(key)
45
+ with open(file_path, 'rb') as file:
46
+ file_data = file.read()
47
+ encrypted_data = f.encrypt(file_data)
48
+ encrypted_file_path = file_path + '.enc'
49
+ with open(encrypted_file_path, 'wb') as file:
50
+ file.write(encrypted_data)
51
+ return encrypted_file_path
52
+ def manage_backups(api, repo_id, max_files=2):
53
+ files = api.list_repo_files(repo_id=repo_id, repo_type='dataset')
54
+ backup_files = [f for f in files if f.startswith('cloudreve_backup_') and f.endswith('.tar.gz.enc')]
55
+ backup_files.sort()
56
+
57
+ if len(backup_files) > max_files:
58
+ files_to_delete = backup_files[:(len(backup_files) - max_files)]
59
+ for file_to_delete in files_to_delete:
60
+ try:
61
+ api.delete_file(path_in_repo=file_to_delete, repo_id=repo_id, repo_type='dataset')
62
+ print(f'Deleted old backup: {file_to_delete}')
63
+ except Exception as e:
64
+ print(f'Error deleting {file_to_delete}: {str(e)}')
65
+ api = HfApi(token='$token')
66
+ try:
67
+ # 生成加密密钥
68
+ key = generate_key('$encryption_key')
69
+
70
+ # 加密文件
71
+ encrypted_file_path = encrypt_file('$file_path', key)
72
+
73
+ # 上传加密文件
74
+ api.upload_file(
75
+ path_or_fileobj=encrypted_file_path,
76
+ path_in_repo='$file_name.enc',
77
+ repo_id='$repo_id',
78
+ repo_type='dataset'
79
+ )
80
+ print(f'Successfully uploaded encrypted $file_name')
81
+
82
+ # 删除临时加密文件
83
+ os.remove(encrypted_file_path)
84
+
85
+ # 管理备份文件数量
86
+ manage_backups(api, '$repo_id')
87
+ except Exception as e:
88
+ print(f'Error uploading file: {str(e)}')
89
+ "
90
+ }
91
+
92
+ # 下载最新备份
93
+ download_latest_backup() {
94
+ token="$HF_TOKEN"
95
+ repo_id="$DATASET_ID"
96
+ encryption_key="$ENCRYPTION_KEY"
97
+
98
+ python3 -c "
99
+ from huggingface_hub import HfApi
100
+ import sys
101
+ import os
102
+ import tarfile
103
+ import tempfile
104
+ import base64
105
+ from cryptography.fernet import Fernet
106
+ from cryptography.hazmat.primitives import hashes
107
+ from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC
108
+ def generate_key(password, salt=b'cloudreve_salt'):
109
+ kdf = PBKDF2HMAC(
110
+ algorithm=hashes.SHA256(),
111
+ length=32,
112
+ salt=salt,
113
+ iterations=100000,
114
+ )
115
+ key = base64.urlsafe_b64encode(kdf.derive(password.encode()))
116
+ return key
117
+ def decrypt_file(encrypted_file_path, key):
118
+ f = Fernet(key)
119
+ with open(encrypted_file_path, 'rb') as file:
120
+ encrypted_data = file.read()
121
+ decrypted_data = f.decrypt(encrypted_data)
122
+ decrypted_file_path = encrypted_file_path[:-4] # 移除 .enc 后缀
123
+ with open(decrypted_file_path, 'wb') as file:
124
+ file.write(decrypted_data)
125
+ return decrypted_file_path
126
+ api = HfApi(token='$token')
127
+ try:
128
+ files = api.list_repo_files(repo_id='$repo_id', repo_type='dataset')
129
+ backup_files = [f for f in files if f.startswith('cloudreve_backup_') and f.endswith('.tar.gz.enc')]
130
+
131
+ if not backup_files:
132
+ print('No backup files found')
133
+ sys.exit()
134
+
135
+ latest_backup = sorted(backup_files)[-1]
136
+
137
+ with tempfile.TemporaryDirectory() as temp_dir:
138
+ # 下载加密的备份文件
139
+ encrypted_filepath = api.hf_hub_download(
140
+ repo_id='$repo_id',
141
+ filename=latest_backup,
142
+ repo_type='dataset',
143
+ local_dir=temp_dir
144
+ )
145
+
146
+ if encrypted_filepath and os.path.exists(encrypted_filepath):
147
+ # 生成解密密钥
148
+ key = generate_key('$encryption_key')
149
+
150
+ # 解密文件
151
+ decrypted_filepath = decrypt_file(encrypted_filepath, key)
152
+
153
+ # 解压缩到目标目录
154
+ with tarfile.open(decrypted_filepath, 'r:gz') as tar:
155
+ tar.extractall('/opt/cloudreve')
156
+
157
+ print(f'Successfully restored backup from {latest_backup}')
158
+
159
+ # 清理临时文件
160
+ os.remove(decrypted_filepath)
161
+
162
+ except Exception as e:
163
+ print(f'Error downloading backup: {str(e)}')
164
+ "
165
+ }
166
+
167
+ # 首次启动时下载最新备份
168
+ echo "Checking for latest backup from HuggingFace..."
169
+ download_latest_backup
170
+
171
+ # 同步函数
172
+ sync_data() {
173
+ local initial_backup_done=false
174
+
175
+ while true; do
176
+ echo "Starting sync process at $(date)"
177
+
178
+ if [ "$initial_backup_done" = "true" ]; then
179
+ echo "Waiting for ${BACKUP_DELAY} seconds before the next backup."
180
+ sleep "$BACKUP_DELAY"
181
+ fi
182
+
183
+ if [ -d /opt/cloudreve ]; then
184
+ timestamp=$(date +%Y%m%d_%H%M%S)
185
+ backup_file="cloudreve_backup_${timestamp}.tar.gz"
186
+
187
+ # 压缩整个 Cloudreve 目录
188
+ tar -czf "/tmp/${backup_file}" -C /opt/cloudreve .
189
+
190
+ echo "Uploading backup to HuggingFace..."
191
+ upload_backup "/tmp/${backup_file}" "${backup_file}"
192
+
193
+ rm -f "/tmp/${backup_file}"
194
+ else
195
+ echo "Cloudreve directory does not exist yet, waiting..."
196
+ sleep 60 # 如果 Cloudreve 目录不存在,等待 60 秒后重试
197
+ fi
198
+ initial_backup_done=true
199
+ done
200
+ }
201
+
202
+ # 后台启动同步进程
203
+ sync_data &
204
+
205
+ # 启动 Cloudreve
206
+ exec /opt/cloudreve/cloudreve -c /opt/cloudreve/config.ini