flzta commited on
Commit
e28913e
·
verified ·
1 Parent(s): 9e9f51a

Update sync_data.sh

Browse files
Files changed (1) hide show
  1. sync_data.sh +93 -113
sync_data.sh CHANGED
@@ -1,200 +1,180 @@
1
  #!/bin/bash
2
 
3
- # 检查环境变量
4
  if [[ -z "$HF_TOKEN" ]] || [[ -z "$DATASET_ID" ]]; then
5
  echo "Starting without backup functionality - missing HF_TOKEN or DATASET_ID"
6
  exec /opt/cloudreve/cloudreve -c /opt/cloudreve/config.ini
7
  exit 0
8
  fi
9
 
10
- # 设置解密密钥 (请务必设置一个长且随机的字符串)
11
- ENCRYPTION_KEY=${ENCRYPTION_KEY:-"请在此处设置您的加密密钥,这是一个长且随机的字符串"}
12
-
13
  # 激活虚拟环境
14
  source /opt/venv/bin/activate
15
 
16
- # 上传备份
 
 
 
 
17
  upload_backup() {
18
  file_path="$1"
19
  file_name="$2"
20
  token="$HF_TOKEN"
21
  repo_id="$DATASET_ID"
22
- encryption_key="$ENCRYPTION_KEY"
 
23
 
24
  python3 -c "
25
  from huggingface_hub import HfApi
26
  import sys
27
  import os
28
- import base64
29
- from cryptography.fernet import Fernet
30
- from cryptography.hazmat.primitives import hashes
31
- from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC
32
- import io
33
- def generate_key(password, salt=b'cloudreve_salt'):
34
- kdf = PBKDF2HMAC(
35
- algorithm=hashes.SHA256(),
36
- length=32,
37
- salt=salt,
38
- iterations=100000,
39
- )
40
- key = base64.urlsafe_b64encode(kdf.derive(password.encode()))
41
- return key
42
- def encrypt_file(file_path, key):
43
- f = Fernet(key)
44
- with open(file_path, 'rb') as file:
45
- file_data = file.read()
46
- encrypted_data = f.encrypt(file_data)
47
- encrypted_file_path = file_path + '.enc'
48
- with open(encrypted_file_path, 'wb') as file:
49
- file.write(encrypted_data)
50
- return encrypted_file_path
51
- def manage_backups(api, repo_id, max_files=10):
52
- files = api.list_repo_files(repo_id=repo_id, repo_type='dataset')
53
- backup_files = [f for f in files if f.startswith('cloudreve_backup_') and f.endswith('.tar.gz.enc')]
54
  backup_files.sort()
55
-
56
  if len(backup_files) >= max_files:
 
57
  files_to_delete = backup_files[:(len(backup_files) - max_files + 1)]
58
  for file_to_delete in files_to_delete:
59
  try:
60
- api.delete_file(path_in_repo=file_to_delete, repo_id=repo_id, repo_type='dataset')
61
- print(f'Deleted old backup: {file_to_delete}')
 
62
  except Exception as e:
63
  print(f'Error deleting {file_to_delete}: {str(e)}')
 
 
64
  api = HfApi(token='$token')
65
  try:
66
- # 生成加密密钥
67
- key = generate_key('$encryption_key')
68
-
69
- # 加密文件
70
- encrypted_file_path = encrypt_file('$file_path', key)
71
-
72
- # 上传加密文件
73
  api.upload_file(
74
- path_or_fileobj=encrypted_file_path,
75
- path_in_repo='$file_name.enc',
76
- repo_id='$repo_id',
77
  repo_type='dataset'
78
  )
79
- print(f'Successfully uploaded encrypted $file_name')
80
-
81
- # 删除临时加密文件
82
- os.remove(encrypted_file_path)
83
-
84
- # 管理备份文件数量
85
- manage_backups(api, '$repo_id')
86
  except Exception as e:
87
  print(f'Error uploading file: {str(e)}')
88
  "
89
  }
90
 
91
- # 下载最新备份
92
  download_latest_backup() {
93
- token="$HF_TOKEN"
94
- repo_id="$DATASET_ID"
95
- encryption_key="$ENCRYPTION_KEY"
96
 
97
- python3 -c "
 
 
98
  from huggingface_hub import HfApi
99
  import sys
100
  import os
101
  import tarfile
102
  import tempfile
103
- import base64
104
- from cryptography.fernet import Fernet
105
- from cryptography.hazmat.primitives import hashes
106
- from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC
107
- def generate_key(password, salt=b'cloudreve_salt'):
108
- kdf = PBKDF2HMAC(
109
- algorithm=hashes.SHA256(),
110
- length=32,
111
- salt=salt,
112
- iterations=100000,
113
- )
114
- key = base64.urlsafe_b64encode(kdf.derive(password.encode()))
115
- return key
116
- def decrypt_file(encrypted_file_path, key):
117
- f = Fernet(key)
118
- with open(encrypted_file_path, 'rb') as file:
119
- encrypted_data = file.read()
120
- decrypted_data = f.decrypt(encrypted_data)
121
- decrypted_file_path = encrypted_file_path[:-4] # 移除 .enc 后缀
122
- with open(decrypted_file_path, 'wb') as file:
123
- file.write(decrypted_data)
124
- return decrypted_file_path
125
  api = HfApi(token='$token')
126
  try:
127
- files = api.list_repo_files(repo_id='$repo_id', repo_type='dataset')
128
- backup_files = [f for f in files if f.startswith('cloudreve_backup_') and f.endswith('.tar.gz.enc')]
129
-
 
130
  if not backup_files:
131
- print('No backup files found')
132
  sys.exit()
133
-
134
  latest_backup = sorted(backup_files)[-1]
135
-
136
  with tempfile.TemporaryDirectory() as temp_dir:
137
- # 下载加密的备份文件
138
- encrypted_filepath = api.hf_hub_download(
139
- repo_id='$repo_id',
140
  filename=latest_backup,
141
  repo_type='dataset',
142
  local_dir=temp_dir
143
  )
144
-
145
- if encrypted_filepath and os.path.exists(encrypted_filepath):
146
- # 生成解密密钥
147
- key = generate_key('$encryption_key')
148
-
149
- # 解密文件
150
- decrypted_filepath = decrypt_file(encrypted_filepath, key)
151
-
152
- # 解压缩到目标目录
153
- with tarfile.open(decrypted_filepath, 'r:gz') as tar:
154
- tar.extractall('/opt/cloudreve')
155
-
 
 
 
 
 
 
 
 
 
 
 
 
156
  print(f'Successfully restored backup from {latest_backup}')
157
-
158
- # 清理临时文件
159
- os.remove(decrypted_filepath)
160
-
161
  except Exception as e:
162
  print(f'Error downloading backup: {str(e)}')
163
  "
164
  }
165
 
166
  # 首次启动时下载最新备份
167
- echo "Checking for latest backup from HuggingFace..."
168
  download_latest_backup
169
 
170
  # 同步函数
171
  sync_data() {
 
172
  while true; do
173
  echo "Starting sync process at $(date)"
174
 
175
- if [ -d /opt/cloudreve ]; then
 
 
 
176
  timestamp=$(date +%Y%m%d_%H%M%S)
177
- backup_file="cloudreve_backup_${timestamp}.tar.gz"
 
 
 
 
 
178
 
179
- # 压缩整个 Cloudreve 目录
180
- tar -czf "/tmp/${backup_file}" -C /opt/cloudreve .
181
 
182
  echo "Uploading backup to HuggingFace..."
183
- upload_backup "/tmp/${backup_file}" "${backup_file}"
184
 
185
- rm -f "/tmp/${backup_file}"
186
  else
187
  echo "Cloudreve directory does not exist yet, waiting for next sync..."
188
  fi
189
 
190
- SYNC_INTERVAL=${SYNC_INTERVAL:-7200} # 默认同步间隔改为 2 小时
191
  echo "Next sync in ${SYNC_INTERVAL} seconds..."
192
  sleep $SYNC_INTERVAL
193
  done
194
  }
195
 
 
 
 
196
  # 后台启动同步进程
197
  sync_data &
198
 
199
- # 启动 Cloudreve
 
200
  exec /opt/cloudreve/cloudreve -c /opt/cloudreve/config.ini
 
1
  #!/bin/bash
2
 
3
+ # 检查 Hugging Face Token 和 Dataset ID 环境变量
4
  if [[ -z "$HF_TOKEN" ]] || [[ -z "$DATASET_ID" ]]; then
5
  echo "Starting without backup functionality - missing HF_TOKEN or DATASET_ID"
6
  exec /opt/cloudreve/cloudreve -c /opt/cloudreve/config.ini
7
  exit 0
8
  fi
9
 
 
 
 
10
  # 激活虚拟环境
11
  source /opt/venv/bin/activate
12
 
13
+ # 定义 Cloudreve 主程序目录
14
+ CLOUDREVE_DIR="/opt/cloudreve"
15
+ BACKUP_PREFIX="cloudreve_backup"
16
+
17
+ # Python 函数: 上传备份
18
  upload_backup() {
19
  file_path="$1"
20
  file_name="$2"
21
  token="$HF_TOKEN"
22
  repo_id="$DATASET_ID"
23
+
24
+ echo "Preparing to upload backup file: $file_path as $file_name to Dataset: $repo_id"
25
 
26
  python3 -c "
27
  from huggingface_hub import HfApi
28
  import sys
29
  import os
30
+ print(f'HF_TOKEN is set: {os.environ.get(\"HF_TOKEN\") is not None}')
31
+ print(f'DATASET_ID is set: {os.environ.get(\"DATASET_ID\") is not None}')
32
+ def manage_backups(api, repo_id_val, max_files=5):
33
+ print('Managing old backups...')
34
+ files = api.list_repo_files(repo_id=repo_id_val, repo_type='dataset')
35
+ backup_files = [f for f in files if f.startswith('$BACKUP_PREFIX') and f.endswith('.tar.gz')]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  backup_files.sort()
 
37
  if len(backup_files) >= max_files:
38
+ print(f'Found {len(backup_files)} backup files, maximum allowed is {max_files}.')
39
  files_to_delete = backup_files[:(len(backup_files) - max_files + 1)]
40
  for file_to_delete in files_to_delete:
41
  try:
42
+ print(f'Deleting old backup: {file_to_delete}')
43
+ api.delete_file(path_in_repo=file_to_delete, repo_id=repo_id_val, repo_type='dataset')
44
+ print(f'Successfully deleted: {file_to_delete}')
45
  except Exception as e:
46
  print(f'Error deleting {file_to_delete}: {str(e)}')
47
+ else:
48
+ print('Number of backup files is within the limit.')
49
  api = HfApi(token='$token')
50
  try:
51
+ repo_id_val = os.environ.get('DATASET_ID') # 从环境变量中获取 repo_id
52
+ print(f'Uploading file: $file_path to {repo_id_val} as $file_name')
 
 
 
 
 
53
  api.upload_file(
54
+ path_or_fileobj='$file_path',
55
+ path_in_repo='$file_name',
56
+ repo_id=repo_id_val,
57
  repo_type='dataset'
58
  )
59
+ print(f'Successfully uploaded $file_name')
60
+ manage_backups(api, repo_id_val)
 
 
 
 
 
61
  except Exception as e:
62
  print(f'Error uploading file: {str(e)}')
63
  "
64
  }
65
 
66
+ # Python 函数: 下载最新备份
67
  download_latest_backup() {
68
+ token="$HF_TOKEN"
69
+ repo_id="$DATASET_ID"
 
70
 
71
+ echo "Preparing to download the latest backup from Dataset: $repo_id"
72
+
73
+ python3 -c "
74
  from huggingface_hub import HfApi
75
  import sys
76
  import os
77
  import tarfile
78
  import tempfile
79
+ print(f'HF_TOKEN is set: {os.environ.get(\"HF_TOKEN\") is not None}')
80
+ print(f'DATASET_ID is set: {os.environ.get(\"DATASET_ID\") is not None}')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  api = HfApi(token='$token')
82
  try:
83
+ repo_id_val = os.environ.get('DATASET_ID') # 从环境变量中获取 repo_id
84
+ print(f'Listing files in Dataset: {repo_id_val}')
85
+ files = api.list_repo_files(repo_id=repo_id_val, repo_type='dataset')
86
+ backup_files = [f for f in files if f.startswith('$BACKUP_PREFIX') and f.endswith('.tar.gz')]
87
  if not backup_files:
88
+ print('No backup files found in the Dataset.')
89
  sys.exit()
 
90
  latest_backup = sorted(backup_files)[-1]
91
+ print(f'Latest backup file found: {latest_backup}')
92
  with tempfile.TemporaryDirectory() as temp_dir:
93
+ filepath = api.hf_hub_download(
94
+ repo_id=repo_id_val,
 
95
  filename=latest_backup,
96
  repo_type='dataset',
97
  local_dir=temp_dir
98
  )
99
+ if filepath and os.path.exists(filepath):
100
+ print(f'Successfully downloaded backup to temporary directory: {filepath}')
101
+ print(\"Before restoring backup:\")
102
+ import subprocess
103
+ subprocess.run(['ls', '-l', \"$CLOUDREVE_DIR\"], shell=True, check=False)
104
+ # 删除现有的 Cloudreve 目录和配置文件
105
+ import shutil
106
+ cloudreve_path = os.path.join(\"$CLOUDREVE_DIR\", \"cloudreve\")
107
+ cloudreve_db_path = os.path.join(\"$CLOUDREVE_DIR\", \"cloudreve.db\")
108
+ config_ini_path = os.path.join(\"$CLOUDREVE_DIR\", \"config.ini\")
109
+ if os.path.exists(cloudreve_path):
110
+ print(f'Deleting: {cloudreve_path}')
111
+ shutil.rmtree(cloudreve_path, ignore_errors=True)
112
+ if os.path.exists(cloudreve_db_path):
113
+ print(f'Deleting: {cloudreve_db_path}')
114
+ os.remove(cloudreve_db_path)
115
+ if os.path.exists(config_ini_path):
116
+ print(f'Deleting: {config_ini_path}')
117
+ os.remove(config_ini_path)
118
+ print(\"Deletion complete.\")
119
+ print(f'Extracting backup archive: {filepath} to $CLOUDREVE_DIR')
120
+ import tarfile
121
+ with tarfile.open(filepath, 'r:gz') as tar:
122
+ tar.extractall(\"$CLOUDREVE_DIR\")
123
  print(f'Successfully restored backup from {latest_backup}')
124
+ print(\"After restoring backup:\")
125
+ subprocess.run(['ls', '-l', \"$CLOUDREVE_DIR\"], shell=True, check=False)
126
+ else:
127
+ print('Error during file download.')
128
  except Exception as e:
129
  print(f'Error downloading backup: {str(e)}')
130
  "
131
  }
132
 
133
  # 首次启动时下载最新备份
134
+ echo "Downloading latest backup from HuggingFace..."
135
  download_latest_backup
136
 
137
  # 同步函数
138
  sync_data() {
139
+ echo "SYNC_DATA FUNCTION IS RUNNING" # 添加了这一行
140
  while true; do
141
  echo "Starting sync process at $(date)"
142
 
143
+ if [ -d "$CLOUDREVE_DIR" ]; then
144
+ echo "Before compression:"
145
+ ls -l \"$CLOUDREVE_DIR\"
146
+
147
  timestamp=$(date +%Y%m%d_%H%M%S)
148
+ backup_file="${BACKUP_PREFIX}_${timestamp}.tar.gz"
149
+ backup_path="/tmp/${backup_file}"
150
+
151
+ echo "Compressing Cloudreve directory (including database and config) to: $backup_path"
152
+ tar -czf "$backup_path" -C "$CLOUDREVE_DIR" cloudreve cloudreve.db config.ini
153
+ echo "Compression complete."
154
 
155
+ echo "After compression:"
156
+ ls -l "$backup_path"
157
 
158
  echo "Uploading backup to HuggingFace..."
159
+ upload_backup "$backup_path" "${backup_file}"
160
 
161
+ rm -f "$backup_path"
162
  else
163
  echo "Cloudreve directory does not exist yet, waiting for next sync..."
164
  fi
165
 
166
+ SYNC_INTERVAL=${SYNC_INTERVAL:-60} # 默认同步间隔改为 60
167
  echo "Next sync in ${SYNC_INTERVAL} seconds..."
168
  sleep $SYNC_INTERVAL
169
  done
170
  }
171
 
172
+ # 延迟启动同步脚本,给 Cloudreve 一些启动时间
173
+ sleep 10
174
+
175
  # 后台启动同步进程
176
  sync_data &
177
 
178
+ # 启动 Halo (这里需要启动 Cloudreve)
179
+ echo "Starting Cloudreve..."
180
  exec /opt/cloudreve/cloudreve -c /opt/cloudreve/config.ini