flzta commited on
Commit
c61036b
·
verified ·
1 Parent(s): 21f54af

Update sync_data.sh

Browse files
Files changed (1) hide show
  1. sync_data.sh +210 -68
sync_data.sh CHANGED
@@ -2,18 +2,27 @@
2
 
3
  # 检查 Hugging Face Token 和 Dataset ID 环境变量
4
  if [[ -z "$HF_TOKEN" ]] || [[ -z "$DATASET_ID" ]]; then
5
- echo "Starting without backup functionality - missing HF_TOKEN or DATASET_ID"
 
 
6
  exec /opt/cloudreve/cloudreve -c /opt/cloudreve/config.ini
7
- exit 0
8
  fi
9
 
10
- # 激活虚拟环境
 
11
  source /opt/venv/bin/activate
12
 
13
- # 定义 Cloudreve 主程序目录
14
  CLOUDREVE_DIR="/opt/cloudreve"
15
  BACKUP_PREFIX="cloudreve_backup"
 
 
 
16
 
 
 
 
17
  # Python 函数: 上传备份
18
  upload_backup() {
19
  file_path="$1"
@@ -49,6 +58,8 @@ def manage_backups(api, repo_id_val, max_files=5):
49
  api = HfApi(token='$token')
50
  try:
51
  repo_id_val = os.environ.get('DATASET_ID') # 从环境变量中获取 repo_id
 
 
52
  print(f'Uploading file: $file_path to {repo_id_val} as $file_name')
53
  api.upload_file(
54
  path_or_fileobj='$file_path',
@@ -60,6 +71,7 @@ try:
60
  manage_backups(api, repo_id_val)
61
  except Exception as e:
62
  print(f'Error uploading file: {str(e)}')
 
63
  "
64
  }
65
 
@@ -71,110 +83,240 @@ download_latest_backup() {
71
  echo "Preparing to download the latest backup from Dataset: $repo_id"
72
 
73
  python3 -c "
74
- from huggingface_hub import HfApi
75
  import sys
76
  import os
77
  import tarfile
78
  import tempfile
 
 
 
79
  print(f'HF_TOKEN is set: {os.environ.get(\"HF_TOKEN\") is not None}')
80
  print(f'DATASET_ID is set: {os.environ.get(\"DATASET_ID\") is not None}')
 
81
  api = HfApi(token='$token')
82
  try:
83
  repo_id_val = os.environ.get('DATASET_ID') # 从环境变量中获取 repo_id
 
 
 
84
  print(f'Listing files in Dataset: {repo_id_val}')
85
  files = api.list_repo_files(repo_id=repo_id_val, repo_type='dataset')
86
  backup_files = [f for f in files if f.startswith('$BACKUP_PREFIX') and f.endswith('.tar.gz')]
 
87
  if not backup_files:
88
- print('No backup files found in the Dataset.')
89
- sys.exit()
 
90
  latest_backup = sorted(backup_files)[-1]
91
  print(f'Latest backup file found: {latest_backup}')
 
92
  with tempfile.TemporaryDirectory() as temp_dir:
93
- filepath = api.hf_hub_download(
94
- repo_id=repo_id_val,
95
- filename=latest_backup,
96
- repo_type='dataset',
97
- local_dir=temp_dir
98
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  if filepath and os.path.exists(filepath):
100
  print(f'Successfully downloaded backup to temporary directory: {filepath}')
101
- print(\"Before restoring backup:\")
102
- import subprocess
103
- subprocess.run(['ls', '-l', \"$CLOUDREVE_DIR\"], shell=True, check=False)
104
- # 删除现有的 Cloudreve 目录和配置文件
105
- import shutil
106
- cloudreve_path = os.path.join(\"$CLOUDREVE_DIR\", \"cloudreve\")
107
- cloudreve_db_path = os.path.join(\"$CLOUDREVE_DIR\", \"cloudreve.db\")
108
- config_ini_path = os.path.join(\"$CLOUDREVE_DIR\", \"config.ini\")
109
- if os.path.exists(cloudreve_path):
110
- print(f'Deleting: {cloudreve_path}')
111
- shutil.rmtree(cloudreve_path, ignore_errors=True)
112
- if os.path.exists(cloudreve_db_path):
113
- print(f'Deleting: {cloudreve_db_path}')
114
- os.remove(cloudreve_db_path)
115
- if os.path.exists(config_ini_path):
116
- print(f'Deleting: {config_ini_path}')
117
- os.remove(config_ini_path)
118
- print(\"Deletion complete.\")
119
- print(f'Extracting backup archive: {filepath} to $CLOUDREVE_DIR')
120
- import tarfile
121
- with tarfile.open(filepath, 'r:gz') as tar:
122
- tar.extractall(\"$CLOUDREVE_DIR\")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  print(f'Successfully restored backup from {latest_backup}')
124
- print(\"After restoring backup:\")
125
- subprocess.run(['ls', '-l', \"$CLOUDREVE_DIR\"], shell=True, check=False)
126
  else:
127
- print('Error during file download.')
 
 
 
 
 
128
  except Exception as e:
129
- print(f'Error downloading backup: {str(e)}')
 
 
 
 
130
  "
131
  }
132
 
133
- # 首次启动时下载最新备份
134
- echo "Downloading latest backup from HuggingFace..."
135
- download_latest_backup
136
 
137
- # 同步函数
138
  sync_data() {
139
- echo "SYNC_DATA FUNCTION IS RUNNING" # 添加了这一行
140
  while true; do
141
- echo "Starting sync process at $(date)"
 
 
 
 
 
142
 
143
- if [ -d "$CLOUDREVE_DIR" ]; then
144
- echo "Before compression:"
145
- ls -l \"$CLOUDREVE_DIR\"
146
 
147
- timestamp=$(date +%Y%m%d_%H%M%S)
148
- backup_file="${BACKUP_PREFIX}_${timestamp}.tar.gz"
149
- backup_path="/tmp/${backup_file}"
 
150
 
151
- echo "Compressing Cloudreve directory (including database and config) to: $backup_path"
152
- tar -czf "$backup_path" -C "$CLOUDREVE_DIR" cloudreve cloudreve.db config.ini
153
- echo "Compression complete."
154
-
155
- echo "After compression:"
156
- ls -l "$backup_path"
 
157
 
 
 
 
158
  echo "Uploading backup to HuggingFace..."
159
  upload_backup "$backup_path" "${backup_file}"
160
-
161
- rm -f "$backup_path"
 
 
 
 
 
162
  else
163
- echo "Cloudreve directory does not exist yet, waiting for next sync..."
 
164
  fi
165
 
166
- SYNC_INTERVAL=${SYNC_INTERVAL:-60} # 默认同步间隔改为 60
 
167
  echo "Next sync in ${SYNC_INTERVAL} seconds..."
168
  sleep $SYNC_INTERVAL
169
  done
170
  }
171
 
172
- # 延迟启动同步脚本,给 Cloudreve 一些启动时间
173
- sleep 10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
 
175
- # 后台启动同步进程
176
- sync_data &
 
 
 
177
 
178
- # 启动 Halo (这里需要启动 Cloudreve)
179
- echo "Starting Cloudreve..."
180
- exec /opt/cloudreve/cloudreve -c /opt/cloudreve/config.ini
 
 
 
 
2
 
3
  # 检查 Hugging Face Token 和 Dataset ID 环境变量
4
  if [[ -z "$HF_TOKEN" ]] || [[ -z "$DATASET_ID" ]]; then
5
+ echo "Starting Cloudreve without backup/restore functionality - missing HF_TOKEN or DATASET_ID"
6
+ # 直接启动 Cloudreve 作为主进程
7
+ echo "Starting Cloudreve directly..."
8
  exec /opt/cloudreve/cloudreve -c /opt/cloudreve/config.ini
9
+ exit 0 # exec 通常不会返回,但加上 exit 0 以防万一
10
  fi
11
 
12
+ # 激活 Python 虚拟环境
13
+ echo "Activating Python venv..."
14
  source /opt/venv/bin/activate
15
 
16
+ # 定义 Cloudreve 主程序目录 和 备份文件前缀
17
  CLOUDREVE_DIR="/opt/cloudreve"
18
  BACKUP_PREFIX="cloudreve_backup"
19
+ CONFIG_FILE_PATH="/opt/cloudreve/config.ini"
20
+ DB_FILE_PATH="/opt/cloudreve/cloudreve.db"
21
+ EXECUTABLE_PATH="/opt/cloudreve/cloudreve"
22
 
23
+ # --- Python 函数定义 ---
24
+ # (Python 函数 upload_backup 和 download_latest_backup 保持不变,这里省略以减少篇幅)
25
+ # --- 请将你原始脚本中的 Python 函数 upload_backup 和 download_latest_backup 复制到这里 ---
26
  # Python 函数: 上传备份
27
  upload_backup() {
28
  file_path="$1"
 
58
  api = HfApi(token='$token')
59
  try:
60
  repo_id_val = os.environ.get('DATASET_ID') # 从环境变量中获取 repo_id
61
+ if not repo_id_val:
62
+ raise ValueError('DATASET_ID environment variable is not set.')
63
  print(f'Uploading file: $file_path to {repo_id_val} as $file_name')
64
  api.upload_file(
65
  path_or_fileobj='$file_path',
 
71
  manage_backups(api, repo_id_val)
72
  except Exception as e:
73
  print(f'Error uploading file: {str(e)}')
74
+ sys.exit(1) # Exit if upload fails
75
  "
76
  }
77
 
 
83
  echo "Preparing to download the latest backup from Dataset: $repo_id"
84
 
85
  python3 -c "
86
+ from huggingface_hub import HfApi, hf_hub_download
87
  import sys
88
  import os
89
  import tarfile
90
  import tempfile
91
+ import shutil
92
+ import subprocess
93
+
94
  print(f'HF_TOKEN is set: {os.environ.get(\"HF_TOKEN\") is not None}')
95
  print(f'DATASET_ID is set: {os.environ.get(\"DATASET_ID\") is not None}')
96
+
97
  api = HfApi(token='$token')
98
  try:
99
  repo_id_val = os.environ.get('DATASET_ID') # 从环境变量中获取 repo_id
100
+ if not repo_id_val:
101
+ raise ValueError('DATASET_ID environment variable is not set.')
102
+
103
  print(f'Listing files in Dataset: {repo_id_val}')
104
  files = api.list_repo_files(repo_id=repo_id_val, repo_type='dataset')
105
  backup_files = [f for f in files if f.startswith('$BACKUP_PREFIX') and f.endswith('.tar.gz')]
106
+
107
  if not backup_files:
108
+ print('No backup files found in the Dataset. Skipping restore.')
109
+ sys.exit(0) # Exit successfully if no backups to restore
110
+
111
  latest_backup = sorted(backup_files)[-1]
112
  print(f'Latest backup file found: {latest_backup}')
113
+
114
  with tempfile.TemporaryDirectory() as temp_dir:
115
+ print(f'Downloading {latest_backup} to temporary directory {temp_dir}...')
116
+ try:
117
+ filepath = hf_hub_download(
118
+ repo_id=repo_id_val,
119
+ filename=latest_backup,
120
+ repo_type='dataset',
121
+ local_dir=temp_dir,
122
+ token=os.environ.get('HF_TOKEN') # Pass token explicitly if needed
123
+ )
124
+ except Exception as download_error:
125
+ print(f'Error during hf_hub_download: {download_error}')
126
+ # Attempt to list files again for debugging
127
+ try:
128
+ print('Attempting to list repo files again for debugging...')
129
+ files_debug = api.list_repo_files(repo_id=repo_id_val, repo_type='dataset')
130
+ print(f'Files found (debug): {files_debug}')
131
+ except Exception as list_error:
132
+ print(f'Error listing files during debug: {list_error}')
133
+ sys.exit(1)
134
+
135
+
136
  if filepath and os.path.exists(filepath):
137
  print(f'Successfully downloaded backup to temporary directory: {filepath}')
138
+
139
+ # Files/Dirs to restore (relative paths within CLOUDREVE_DIR)
140
+ items_to_restore = ['cloudreve', 'cloudreve.db', 'config.ini']
141
+
142
+ # Ensure target directory exists
143
+ os.makedirs(\"$CLOUDREVE_DIR\", exist_ok=True)
144
+
145
+ print('Listing contents before restore:')
146
+ subprocess.run(['ls', '-lA', \"$CLOUDREVE_DIR\"], check=False) # Use -A to show hidden files
147
+
148
+ # --- Safer Restore Logic ---
149
+ # 1. Extract backup to a temporary location first
150
+ extract_temp_dir = os.path.join(temp_dir, 'extracted_backup')
151
+ os.makedirs(extract_temp_dir, exist_ok=True)
152
+ print(f'Extracting backup archive: {filepath} to {extract_temp_dir}')
153
+ try:
154
+ with tarfile.open(filepath, 'r:gz') as tar:
155
+ tar.extractall(extract_temp_dir)
156
+ print('Extraction complete.')
157
+ except tarfile.ReadError as tar_err:
158
+ print(f'Error reading tar file: {tar_err}')
159
+ sys.exit(1)
160
+ except Exception as extract_err:
161
+ print(f'Error during extraction: {extract_err}')
162
+ sys.exit(1)
163
+
164
+
165
+ # 2. Check if essential files exist in the extracted backup
166
+ essential_files_present = True
167
+ for item in items_to_restore:
168
+ extracted_item_path = os.path.join(extract_temp_dir, item)
169
+ if not os.path.exists(extracted_item_path):
170
+ print(f'Error: Essential item "{item}" not found in extracted backup at {extracted_item_path}. Aborting restore.')
171
+ essential_files_present = False
172
+ break # Stop checking
173
+
174
+ if not essential_files_present:
175
+ sys.exit(1) # Abort if essential files are missing
176
+
177
+ # 3. Delete existing items in the target directory
178
+ print(f'Deleting existing items in $CLOUDREVE_DIR before restoring...')
179
+ for item in items_to_restore:
180
+ target_path = os.path.join(\"$CLOUDREVE_DIR\", item)
181
+ if os.path.exists(target_path):
182
+ try:
183
+ if os.path.isdir(target_path) and not os.path.islink(target_path):
184
+ print(f'Deleting directory: {target_path}')
185
+ shutil.rmtree(target_path)
186
+ else:
187
+ print(f'Deleting file/link: {target_path}')
188
+ os.remove(target_path)
189
+ except OSError as e:
190
+ print(f'Error deleting {target_path}: {e}. Continuing...')
191
+
192
+
193
+ # 4. Move extracted items to the target directory
194
+ print(f'Moving extracted items from {extract_temp_dir} to $CLOUDREVE_DIR...')
195
+ for item in items_to_restore:
196
+ source_path = os.path.join(extract_temp_dir, item)
197
+ target_path = os.path.join(\"$CLOUDREVE_DIR\", item)
198
+ try:
199
+ print(f'Moving {source_path} to {target_path}')
200
+ shutil.move(source_path, target_path)
201
+ except Exception as move_err:
202
+ print(f'Error moving {item}: {move_err}')
203
+ # Decide if this is critical, maybe exit? For now, print and continue.
204
+
205
+
206
  print(f'Successfully restored backup from {latest_backup}')
207
+ print('Listing contents after restore:')
208
+ subprocess.run(['ls', '-lA', \"$CLOUDREVE_DIR\"], check=False) # Use -A
209
  else:
210
+ print(f'Error: Downloaded file path "{filepath}" does not exist or download failed.')
211
+ sys.exit(1) # Exit if download path invalid
212
+
213
+ except ValueError as ve:
214
+ print(f'Configuration Error: {ve}')
215
+ sys.exit(1)
216
  except Exception as e:
217
+ print(f'Error during backup download/restore: {str(e)}')
218
+ # Print traceback for more details
219
+ import traceback
220
+ traceback.print_exc()
221
+ sys.exit(1) # Exit on error
222
  "
223
  }
224
 
 
 
 
225
 
226
+ # --- Sync Function ---
227
  sync_data() {
228
+ echo "Background Sync Process Started"
229
  while true; do
230
+ # Wait for initial Cloudreve setup potentially creating db/config if first run
231
+ # Also wait if essential files are missing before attempting backup
232
+ while [ ! -f "$CONFIG_FILE_PATH" ] || [ ! -f "$DB_FILE_PATH" ] || [ ! -f "$EXECUTABLE_PATH" ]; do
233
+ echo "Waiting for essential Cloudreve files (config.ini, cloudreve.db, cloudreve) to exist before backup attempt..."
234
+ sleep 15
235
+ done
236
 
237
+ echo "Starting sync cycle at $(date)"
 
 
238
 
239
+ # Define backup path and name
240
+ timestamp=$(date +%Y%m%d_%H%M%S)
241
+ backup_file="${BACKUP_PREFIX}_${timestamp}.tar.gz"
242
+ backup_path="/tmp/${backup_file}" # Use /tmp for temporary files
243
 
244
+ echo "Compressing Cloudreve data (executable, db, config) to: $backup_path"
245
+ # Use -C to change directory, ensuring archive paths are relative
246
+ # Only include the executable, db, and config file
247
+ tar -czf "$backup_path" -C "$CLOUDREVE_DIR" \
248
+ $(basename "$EXECUTABLE_PATH") \
249
+ $(basename "$DB_FILE_PATH") \
250
+ $(basename "$CONFIG_FILE_PATH")
251
 
252
+ # Check if compression was successful (file exists and is not empty)
253
+ if [ -s "$backup_path" ]; then
254
+ echo "Compression complete. File size: $(ls -lh "$backup_path" | awk '{print $5}')"
255
  echo "Uploading backup to HuggingFace..."
256
  upload_backup "$backup_path" "${backup_file}"
257
+ # Check exit status of upload_backup? The python script should exit non-zero on failure.
258
+ if [ $? -ne 0 ]; then
259
+ echo "Backup upload failed. Keeping local archive: $backup_path"
260
+ else
261
+ echo "Upload successful. Removing local archive."
262
+ rm -f "$backup_path"
263
+ fi
264
  else
265
+ echo "Compression failed or created an empty file. Skipping upload."
266
+ rm -f "$backup_path" # Remove potentially empty/corrupt file
267
  fi
268
 
269
+ # Define sync interval (use environment variable or default to 3600 seconds = 1 hour)
270
+ SYNC_INTERVAL=${SYNC_INTERVAL:-3600}
271
  echo "Next sync in ${SYNC_INTERVAL} seconds..."
272
  sleep $SYNC_INTERVAL
273
  done
274
  }
275
 
276
+ # --- Main Execution ---
277
+
278
+ # 1. Attempt to restore from the latest backup on startup
279
+ echo "Attempting to restore latest backup from HuggingFace..."
280
+ download_latest_backup
281
+ # Check exit code? If restore fails critically, maybe don't start?
282
+ # The python script now exits non-zero on critical errors.
283
+ if [ $? -ne 0 ]; then
284
+ echo "CRITICAL: Backup restoration failed. Exiting."
285
+ exit 1
286
+ fi
287
+ echo "Backup restore process finished."
288
+
289
+
290
+ # 2. Check if config file exists after potential restore. If not, Cloudreve needs to run once to create it.
291
+ if [ ! -f "$CONFIG_FILE_PATH" ]; then
292
+ echo "Config file ($CONFIG_FILE_PATH) not found. Running Cloudreve once to generate initial config."
293
+ /opt/cloudreve/cloudreve -c "$CONFIG_FILE_PATH"
294
+ # Cloudreve will print initial password and exit (or wait for setup if web setup enabled)
295
+ # Need to check if it actually created the config...
296
+ if [ ! -f "$CONFIG_FILE_PATH" ]; then
297
+ echo "CRITICAL: Cloudreve failed to create initial config file. Exiting."
298
+ exit 1
299
+ else
300
+ echo "Initial config file created. Please check logs for admin credentials if needed."
301
+ # Consider stopping here or adding a pause? For automated deployment, continue.
302
+ fi
303
+ fi
304
+
305
+
306
+ # 3. Start the background sync process
307
+ echo "Starting background data sync..."
308
+ sync_data & # Run sync_data function in the background
309
+ sync_pid=$! # Get PID of background sync process
310
 
311
+ # 4. Start Cloudreve in the foreground using exec
312
+ # 'exec' replaces the current shell process with the Cloudreve process.
313
+ # This makes Cloudreve the main process of the container.
314
+ echo "Starting Cloudreve application as the main process..."
315
+ exec /opt/cloudreve/cloudreve -c "$CONFIG_FILE_PATH"
316
 
317
+ # If exec fails, the script continues here.
318
+ exec_failed_code=$?
319
+ echo "CRITICAL: Failed to execute Cloudreve. Exit code: $exec_failed_code"
320
+ # Attempt to kill the background sync process if exec failed
321
+ kill $sync_pid 2>/dev/null
322
+ exit $exec_failed_code