flzta commited on
Commit
752618a
·
verified ·
1 Parent(s): efc3e07

Update sync_data.sh

Browse files
Files changed (1) hide show
  1. sync_data.sh +106 -148
sync_data.sh CHANGED
@@ -1,180 +1,162 @@
1
  #!/bin/bash
2
 
3
- # 检查 Hugging Face Token 和 Dataset ID 环境变量
4
  if [[ -z "$HF_TOKEN" ]] || [[ -z "$DATASET_ID" ]]; then
5
  echo "Starting without backup functionality - missing HF_TOKEN or DATASET_ID"
6
  exec /opt/cloudreve/cloudreve -c /opt/cloudreve/config.ini
7
  exit 0
8
  fi
9
 
 
 
 
10
  # 激活虚拟环境
11
  source /opt/venv/bin/activate
12
 
13
- # 定义 Cloudreve 主程序目录
14
- CLOUDREVE_DIR="/opt/cloudreve"
15
- BACKUP_PREFIX="cloudreve_backup"
16
- HF_DATA_DIR="/opt/cloudreve/hf_uploaded" # 用于记录已上传到 HF 的文件
17
-
18
- # 创建 HF 上传记录目录
19
- mkdir -p "$HF_DATA_DIR"
20
-
21
- # Python 函数: 上传文件到 Hugging Face Dataset
22
- upload_file_to_dataset() {
23
- local_file_path="$1"
24
- relative_path=$(echo "$local_file_path" | sed "s|$CLOUDREVE_DIR/data/||")
25
- token="$HF_TOKEN"
26
- repo_id="$DATASET_ID"
27
-
28
- echo "Preparing to upload file: $local_file_path to Dataset: $repo_id at path: $relative_path"
29
-
30
- python3 -c "
31
- from huggingface_hub import HfApi
32
- import os
33
-
34
- api = HfApi(token='$token')
35
- try:
36
- repo_id_val = os.environ.get('DATASET_ID')
37
- print(f'Uploading file: '$local_file_path' to {repo_id_val} as '$relative_path'')
38
- api.upload_file(
39
- path_or_fileobj='$local_file_path',
40
- path_in_repo='$relative_path',
41
- repo_id=repo_id_val,
42
- repo_type='dataset'
43
- )
44
- print(f'Successfully uploaded '$relative_path'')
45
- except Exception as e:
46
- print(f'Error uploading file: {str(e)}')
47
- "
48
- }
49
-
50
- # Python 函数: 上传备份
51
  upload_backup() {
52
  file_path="$1"
53
  file_name="$2"
54
  token="$HF_TOKEN"
55
  repo_id="$DATASET_ID"
56
-
57
- echo "Preparing to upload backup file: $file_path as $file_name to Dataset: $repo_id"
58
 
59
  python3 -c "
60
  from huggingface_hub import HfApi
61
  import sys
62
  import os
63
-
64
- print(f'HF_TOKEN is set: {os.environ.get(\"HF_TOKEN\") is not None}')
65
- print(f'DATASET_ID is set: {os.environ.get(\"DATASET_ID\") is not None}')
66
-
67
- def manage_backups(api, repo_id_val, max_files=50):
68
- print('Managing old backups...')
69
- files = api.list_repo_files(repo_id=repo_id_val, repo_type='dataset')
70
- backup_files = [f for f in files if f.startswith('$BACKUP_PREFIX') and f.endswith('.tar.gz')]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  backup_files.sort()
72
 
73
  if len(backup_files) >= max_files:
74
- print(f'Found {len(backup_files)} backup files, maximum allowed is {max_files}.')
75
  files_to_delete = backup_files[:(len(backup_files) - max_files + 1)]
76
  for file_to_delete in files_to_delete:
77
  try:
78
- print(f'Deleting old backup: {file_to_delete}')
79
- api.delete_file(path_in_repo=file_to_delete, repo_id=repo_id_val, repo_type='dataset')
80
- print(f'Successfully deleted: {file_to_delete}')
81
  except Exception as e:
82
  print(f'Error deleting {file_to_delete}: {str(e)}')
83
- else:
84
- print('Number of backup files is within the limit.')
85
-
86
  api = HfApi(token='$token')
87
  try:
88
- repo_id_val = os.environ.get('DATASET_ID') # 从环境变量中获取 repo_id
89
- print(f'Uploading file: $file_path to {repo_id_val} as $file_name')
 
 
 
 
 
90
  api.upload_file(
91
- path_or_fileobj='$file_path',
92
- path_in_repo='$file_name',
93
- repo_id=repo_id_val,
94
  repo_type='dataset'
95
  )
96
- print(f'Successfully uploaded $file_name')
 
 
 
 
 
 
97
  except Exception as e:
98
  print(f'Error uploading file: {str(e)}')
99
  "
100
  }
101
 
102
- # Python 函数: 下载最新备份
103
  download_latest_backup() {
104
- token="$HF_TOKEN"
105
- repo_id="$DATASET_ID"
106
-
107
- echo "Preparing to download the latest backup from Dataset: $repo_id"
108
 
109
- python3 -c "
110
  from huggingface_hub import HfApi
111
  import sys
112
  import os
113
  import tarfile
114
  import tempfile
115
-
116
- print(f'HF_TOKEN is set: {os.environ.get(\"HF_TOKEN\") is not None}')
117
- print(f'DATASET_ID is set: {os.environ.get(\"DATASET_ID\") is not None}')
118
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  api = HfApi(token='$token')
120
  try:
121
- repo_id_val = os.environ.get('DATASET_ID') # 从环境变量中获取 repo_id
122
- print(f'Listing files in Dataset: {repo_id_val}')
123
- files = api.list_repo_files(repo_id=repo_id_val, repo_type='dataset')
124
- backup_files = [f for f in files if f.startswith('$BACKUP_PREFIX') and f.endswith('.tar.gz')]
125
 
126
  if not backup_files:
127
- print('No backup files found in the Dataset.')
128
  sys.exit()
129
 
130
  latest_backup = sorted(backup_files)[-1]
131
- print(f'Latest backup file found: {latest_backup}')
132
 
133
  with tempfile.TemporaryDirectory() as temp_dir:
134
- filepath = api.hf_hub_download(
135
- repo_id=repo_id_val,
 
136
  filename=latest_backup,
137
  repo_type='dataset',
138
  local_dir=temp_dir
139
  )
140
 
141
- if filepath and os.path.exists(filepath):
142
- print(f'Successfully downloaded backup to temporary directory: {filepath}')
143
- print(\"Before restoring backup:\")
144
- import subprocess
145
- subprocess.run(['ls', '-l', \"$CLOUDREVE_DIR\"], shell=True, check=False)
146
-
147
- # 删除现有的 Cloudreve 目录和配置文件
148
- import shutil
149
- cloudreve_path = os.path.join(\"$CLOUDREVE_DIR\", \"cloudreve\")
150
- cloudreve_db_path = os.path.join(\"$CLOUDREVE_DIR\", \"cloudreve.db\")
151
- config_ini_path = os.path.join(\"$CLOUDREVE_DIR\", \"config.ini\")
152
- data_path = os.path.join(\"$CLOUDREVE_DIR\", \"data\")
153
-
154
- if os.path.exists(cloudreve_path):
155
- print(f'Deleting: {cloudreve_path}')
156
- shutil.rmtree(cloudreve_path, ignore_errors=True)
157
- if os.path.exists(cloudreve_db_path):
158
- print(f'Deleting: {cloudreve_db_path}')
159
- os.remove(cloudreve_db_path)
160
- if os.path.exists(config_ini_path):
161
- print(f'Deleting: {config_ini_path}')
162
- os.remove(config_ini_path)
163
- if os.path.exists(data_path):
164
- print(f'Deleting: {data_path}')
165
- shutil.rmtree(data_path, ignore_errors=True)
166
- print(\"Deletion complete.\")
167
-
168
- print(f'Extracting backup archive: {filepath} to $CLOUDREVE_DIR')
169
- import tarfile
170
- with tarfile.open(filepath, 'r:gz') as tar:
171
- tar.extractall(\"$CLOUDREVE_DIR\")
172
  print(f'Successfully restored backup from {latest_backup}')
173
 
174
- print(\"After restoring backup:\")
175
- subprocess.run(['ls', '-l', \"$CLOUDREVE_DIR\"], shell=True, check=False)
176
- else:
177
- print('Error during file download.')
178
 
179
  except Exception as e:
180
  print(f'Error downloading backup: {str(e)}')
@@ -182,61 +164,37 @@ except Exception as e:
182
  }
183
 
184
  # 首次启动时下载最新备份
185
- echo "Downloading latest backup from HuggingFace..."
186
  download_latest_backup
187
 
188
  # 同步函数
189
  sync_data() {
190
- echo "SYNC_DATA FUNCTION IS RUNNING"
191
  while true; do
192
  echo "Starting sync process at $(date)"
193
 
194
- # 检查 Cloudreve data 目录并上传新文件
195
- if [ -d "$CLOUDREVE_DIR/data" ]; then
196
- find "$CLOUDREVE_DIR/data" -type f -print0 | while IFS= read -r -d $'\0' file; do
197
- if [ ! -f "$HF_DATA_DIR/$(basename "$file")" ]; then
198
- echo "New file found: $file"
199
- upload_file_to_dataset "$file"
200
- touch "$HF_DATA_DIR/$(basename "$file")" # 创建已上传标记
201
- fi
202
- done
203
- fi
204
-
205
- if [ -d "$CLOUDREVE_DIR" ]; then
206
- echo "Before compression:"
207
- ls -l \"$CLOUDREVE_DIR\"
208
-
209
  timestamp=$(date +%Y%m%d_%H%M%S)
210
- backup_file="${BACKUP_PREFIX}_${timestamp}.tar.gz"
211
- backup_path="/tmp/${backup_file}"
212
 
213
- echo "Compressing Cloudreve directory (including database and config) to: $backup_path"
214
- tar -czf "$backup_path" -C "$CLOUDREVE_DIR" cloudreve cloudreve.db config.ini
215
- echo "Compression complete."
216
-
217
- echo "After compression:"
218
- ls -l "$backup_path"
219
 
220
  echo "Uploading backup to HuggingFace..."
221
- upload_backup "$backup_path" "${backup_file}"
222
 
223
- rm -f "$backup_path"
224
  else
225
  echo "Cloudreve directory does not exist yet, waiting for next sync..."
226
  fi
227
 
228
- SYNC_INTERVAL=${SYNC_INTERVAL:-300} # 默认同步间隔改为 5 分钟
229
  echo "Next sync in ${SYNC_INTERVAL} seconds..."
230
  sleep $SYNC_INTERVAL
231
  done
232
  }
233
 
234
- # 延迟启动同步脚本,给 Cloudreve 一些启动时间
235
- sleep 30
236
-
237
  # 后台启动同步进程
238
  sync_data &
239
 
240
- # 启动 Cloudreve (这里需要启动 Cloudreve)
241
- echo "Starting Cloudreve..."
242
  exec /opt/cloudreve/cloudreve -c /opt/cloudreve/config.ini
 
1
  #!/bin/bash
2
 
3
+ # 检查环境变量
4
  if [[ -z "$HF_TOKEN" ]] || [[ -z "$DATASET_ID" ]]; then
5
  echo "Starting without backup functionality - missing HF_TOKEN or DATASET_ID"
6
  exec /opt/cloudreve/cloudreve -c /opt/cloudreve/config.ini
7
  exit 0
8
  fi
9
 
10
+ # 设置解密密钥 (请务必设置一个长且随机的字符串)
11
+ ENCRYPTION_KEY=${ENCRYPTION_KEY:-"请在此处设置您的加密密钥,这是一个长且随机的字符串"}
12
+
13
  # 激活虚拟环境
14
  source /opt/venv/bin/activate
15
 
16
+ # 上传备份
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  upload_backup() {
18
  file_path="$1"
19
  file_name="$2"
20
  token="$HF_TOKEN"
21
  repo_id="$DATASET_ID"
22
+ encryption_key="$ENCRYPTION_KEY"
 
23
 
24
  python3 -c "
25
  from huggingface_hub import HfApi
26
  import sys
27
  import os
28
+ import base64
29
+ from cryptography.fernet import Fernet
30
+ from cryptography.hazmat.primitives import hashes
31
+ from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC
32
+ import io
33
+ def generate_key(password, salt=b'cloudreve_salt'):
34
+ kdf = PBKDF2HMAC(
35
+ algorithm=hashes.SHA256(),
36
+ length=32,
37
+ salt=salt,
38
+ iterations=100000,
39
+ )
40
+ key = base64.urlsafe_b64encode(kdf.derive(password.encode()))
41
+ return key
42
+ def encrypt_file(file_path, key):
43
+ f = Fernet(key)
44
+ with open(file_path, 'rb') as file:
45
+ file_data = file.read()
46
+ encrypted_data = f.encrypt(file_data)
47
+ encrypted_file_path = file_path + '.enc'
48
+ with open(encrypted_file_path, 'wb') as file:
49
+ file.write(encrypted_data)
50
+ return encrypted_file_path
51
+ def manage_backups(api, repo_id, max_files=10):
52
+ files = api.list_repo_files(repo_id=repo_id, repo_type='dataset')
53
+ backup_files = [f for f in files if f.startswith('cloudreve_backup_') and f.endswith('.tar.gz.enc')]
54
  backup_files.sort()
55
 
56
  if len(backup_files) >= max_files:
 
57
  files_to_delete = backup_files[:(len(backup_files) - max_files + 1)]
58
  for file_to_delete in files_to_delete:
59
  try:
60
+ api.delete_file(path_in_repo=file_to_delete, repo_id=repo_id, repo_type='dataset')
61
+ print(f'Deleted old backup: {file_to_delete}')
 
62
  except Exception as e:
63
  print(f'Error deleting {file_to_delete}: {str(e)}')
 
 
 
64
  api = HfApi(token='$token')
65
  try:
66
+ # 生成加密密钥
67
+ key = generate_key('$encryption_key')
68
+
69
+ # 加密文件
70
+ encrypted_file_path = encrypt_file('$file_path', key)
71
+
72
+ # 上传加密文件
73
  api.upload_file(
74
+ path_or_fileobj=encrypted_file_path,
75
+ path_in_repo='$file_name.enc',
76
+ repo_id='$repo_id',
77
  repo_type='dataset'
78
  )
79
+ print(f'Successfully uploaded encrypted $file_name')
80
+
81
+ # 删除临时加密文件
82
+ os.remove(encrypted_file_path)
83
+
84
+ # 管理备份文件数量
85
+ manage_backups(api, '$repo_id')
86
  except Exception as e:
87
  print(f'Error uploading file: {str(e)}')
88
  "
89
  }
90
 
91
+ # 下载最新备份
92
  download_latest_backup() {
93
+ token="$HF_TOKEN"
94
+ repo_id="$DATASET_ID"
95
+ encryption_key="$ENCRYPTION_KEY"
 
96
 
97
+ python3 -c "
98
  from huggingface_hub import HfApi
99
  import sys
100
  import os
101
  import tarfile
102
  import tempfile
103
+ import base64
104
+ from cryptography.fernet import Fernet
105
+ from cryptography.hazmat.primitives import hashes
106
+ from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC
107
+ def generate_key(password, salt=b'cloudreve_salt'):
108
+ kdf = PBKDF2HMAC(
109
+ algorithm=hashes.SHA256(),
110
+ length=32,
111
+ salt=salt,
112
+ iterations=100000,
113
+ )
114
+ key = base64.urlsafe_b64encode(kdf.derive(password.encode()))
115
+ return key
116
+ def decrypt_file(encrypted_file_path, key):
117
+ f = Fernet(key)
118
+ with open(encrypted_file_path, 'rb') as file:
119
+ encrypted_data = file.read()
120
+ decrypted_data = f.decrypt(encrypted_data)
121
+ decrypted_file_path = encrypted_file_path[:-4] # 移除 .enc 后缀
122
+ with open(decrypted_file_path, 'wb') as file:
123
+ file.write(decrypted_data)
124
+ return decrypted_file_path
125
  api = HfApi(token='$token')
126
  try:
127
+ files = api.list_repo_files(repo_id='$repo_id', repo_type='dataset')
128
+ backup_files = [f for f in files if f.startswith('cloudreve_backup_') and f.endswith('.tar.gz.enc')]
 
 
129
 
130
  if not backup_files:
131
+ print('No backup files found')
132
  sys.exit()
133
 
134
  latest_backup = sorted(backup_files)[-1]
 
135
 
136
  with tempfile.TemporaryDirectory() as temp_dir:
137
+ # 下载加密的备份文件
138
+ encrypted_filepath = api.hf_hub_download(
139
+ repo_id='$repo_id',
140
  filename=latest_backup,
141
  repo_type='dataset',
142
  local_dir=temp_dir
143
  )
144
 
145
+ if encrypted_filepath and os.path.exists(encrypted_filepath):
146
+ # 生成解密密钥
147
+ key = generate_key('$encryption_key')
148
+
149
+ # 解密文件
150
+ decrypted_filepath = decrypt_file(encrypted_filepath, key)
151
+
152
+ # 解压缩到目标目录
153
+ with tarfile.open(decrypted_filepath, 'r:gz') as tar:
154
+ tar.extractall('/opt/cloudreve')
155
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
  print(f'Successfully restored backup from {latest_backup}')
157
 
158
+ # 清理临时文件
159
+ os.remove(decrypted_filepath)
 
 
160
 
161
  except Exception as e:
162
  print(f'Error downloading backup: {str(e)}')
 
164
  }
165
 
166
  # 首次启动时下载最新备份
167
+ echo "Checking for latest backup from HuggingFace..."
168
  download_latest_backup
169
 
170
  # 同步函数
171
  sync_data() {
 
172
  while true; do
173
  echo "Starting sync process at $(date)"
174
 
175
+ if [ -d /opt/cloudreve ]; then
 
 
 
 
 
 
 
 
 
 
 
 
 
 
176
  timestamp=$(date +%Y%m%d_%H%M%S)
177
+ backup_file="cloudreve_backup_${timestamp}.tar.gz"
 
178
 
179
+ # 压缩整个 Cloudreve 目录
180
+ tar -czf "/tmp/${backup_file}" -C /opt/cloudreve .
 
 
 
 
181
 
182
  echo "Uploading backup to HuggingFace..."
183
+ upload_backup "/tmp/${backup_file}" "${backup_file}"
184
 
185
+ rm -f "/tmp/${backup_file}"
186
  else
187
  echo "Cloudreve directory does not exist yet, waiting for next sync..."
188
  fi
189
 
190
+ SYNC_INTERVAL=${SYNC_INTERVAL:-7200} # 默认同步间隔改为 2 小时
191
  echo "Next sync in ${SYNC_INTERVAL} seconds..."
192
  sleep $SYNC_INTERVAL
193
  done
194
  }
195
 
 
 
 
196
  # 后台启动同步进程
197
  sync_data &
198
 
199
+ # 启动 Cloudreve
 
200
  exec /opt/cloudreve/cloudreve -c /opt/cloudreve/config.ini