flzta commited on
Commit
37c78bc
·
verified ·
1 Parent(s): f34c42c

Update sync_data.sh

Browse files
Files changed (1) hide show
  1. sync_data.sh +47 -213
sync_data.sh CHANGED
@@ -1,19 +1,30 @@
1
  #!/bin/bash
2
 
3
  # 检查 Hugging Face Token 和 Dataset ID 环境变量
 
4
  if [[ -z "$HF_TOKEN" ]] || [[ -z "$DATASET_ID" ]]; then
5
  echo "Starting Cloudreve without backup/restore functionality - missing HF_TOKEN or DATASET_ID"
6
  # 直接启动 Cloudreve 作为主进程
7
  echo "Starting Cloudreve directly..."
 
 
 
 
 
 
 
8
  exec /opt/cloudreve/cloudreve -c /opt/cloudreve/config.ini
9
- exit 0 # exec 通常不会返回,但加上 exit 0 以防万一
10
  fi
11
 
 
12
  # 激活 Python 虚拟环境
 
13
  echo "Activating Python venv..."
14
  source /opt/venv/bin/activate
15
 
16
  # 定义 Cloudreve 主程序目录 和 备份文件前缀
 
17
  CLOUDREVE_DIR="/opt/cloudreve"
18
  BACKUP_PREFIX="cloudreve_backup"
19
  CONFIG_FILE_PATH="/opt/cloudreve/config.ini"
@@ -21,218 +32,36 @@ DB_FILE_PATH="/opt/cloudreve/cloudreve.db"
21
  EXECUTABLE_PATH="/opt/cloudreve/cloudreve"
22
 
23
  # --- Python 函数定义 ---
24
- # (Python 函数 upload_backup 和 download_latest_backup 保持不变,这里省略以减少篇幅)
25
- # --- 请将你原始脚本中的 Python 函数 upload_backup 和 download_latest_backup 复制到这里 ---
26
- # Python 函数: 上传备份
27
- upload_backup() {
28
- file_path="$1"
29
- file_name="$2"
30
- token="$HF_TOKEN"
31
- repo_id="$DATASET_ID"
32
-
33
- echo "Preparing to upload backup file: $file_path as $file_name to Dataset: $repo_id"
34
-
35
- python3 -c "
36
- from huggingface_hub import HfApi
37
- import sys
38
- import os
39
- print(f'HF_TOKEN is set: {os.environ.get(\"HF_TOKEN\") is not None}')
40
- print(f'DATASET_ID is set: {os.environ.get(\"DATASET_ID\") is not None}')
41
- def manage_backups(api, repo_id_val, max_files=5):
42
- print('Managing old backups...')
43
- files = api.list_repo_files(repo_id=repo_id_val, repo_type='dataset')
44
- backup_files = [f for f in files if f.startswith('$BACKUP_PREFIX') and f.endswith('.tar.gz')]
45
- backup_files.sort()
46
- if len(backup_files) >= max_files:
47
- print(f'Found {len(backup_files)} backup files, maximum allowed is {max_files}.')
48
- files_to_delete = backup_files[:(len(backup_files) - max_files + 1)]
49
- for file_to_delete in files_to_delete:
50
- try:
51
- print(f'Deleting old backup: {file_to_delete}')
52
- api.delete_file(path_in_repo=file_to_delete, repo_id=repo_id_val, repo_type='dataset')
53
- print(f'Successfully deleted: {file_to_delete}')
54
- except Exception as e:
55
- print(f'Error deleting {file_to_delete}: {str(e)}')
56
- else:
57
- print('Number of backup files is within the limit.')
58
- api = HfApi(token='$token')
59
- try:
60
- repo_id_val = os.environ.get('DATASET_ID') # 从环境变量中获取 repo_id
61
- if not repo_id_val:
62
- raise ValueError('DATASET_ID environment variable is not set.')
63
- print(f'Uploading file: $file_path to {repo_id_val} as $file_name')
64
- api.upload_file(
65
- path_or_fileobj='$file_path',
66
- path_in_repo='$file_name',
67
- repo_id=repo_id_val,
68
- repo_type='dataset'
69
- )
70
- print(f'Successfully uploaded $file_name')
71
- manage_backups(api, repo_id_val)
72
- except Exception as e:
73
- print(f'Error uploading file: {str(e)}')
74
- sys.exit(1) # Exit if upload fails
75
- "
76
- }
77
-
78
- # Python 函数: 下载最新备份
79
- download_latest_backup() {
80
- token="$HF_TOKEN"
81
- repo_id="$DATASET_ID"
82
-
83
- echo "Preparing to download the latest backup from Dataset: $repo_id"
84
-
85
- python3 -c "
86
- from huggingface_hub import HfApi, hf_hub_download
87
- import sys
88
- import os
89
- import tarfile
90
- import tempfile
91
- import shutil
92
- import subprocess
93
-
94
- print(f'HF_TOKEN is set: {os.environ.get(\"HF_TOKEN\") is not None}')
95
- print(f'DATASET_ID is set: {os.environ.get(\"DATASET_ID\") is not None}')
96
-
97
- api = HfApi(token='$token')
98
- try:
99
- repo_id_val = os.environ.get('DATASET_ID') # 从环境变量中获取 repo_id
100
- if not repo_id_val:
101
- raise ValueError('DATASET_ID environment variable is not set.')
102
-
103
- print(f'Listing files in Dataset: {repo_id_val}')
104
- files = api.list_repo_files(repo_id=repo_id_val, repo_type='dataset')
105
- backup_files = [f for f in files if f.startswith('$BACKUP_PREFIX') and f.endswith('.tar.gz')]
106
-
107
- if not backup_files:
108
- print('No backup files found in the Dataset. Skipping restore.')
109
- sys.exit(0) # Exit successfully if no backups to restore
110
-
111
- latest_backup = sorted(backup_files)[-1]
112
- print(f'Latest backup file found: {latest_backup}')
113
-
114
- with tempfile.TemporaryDirectory() as temp_dir:
115
- print(f'Downloading {latest_backup} to temporary directory {temp_dir}...')
116
- try:
117
- filepath = hf_hub_download(
118
- repo_id=repo_id_val,
119
- filename=latest_backup,
120
- repo_type='dataset',
121
- local_dir=temp_dir,
122
- token=os.environ.get('HF_TOKEN') # Pass token explicitly if needed
123
- )
124
- except Exception as download_error:
125
- print(f'Error during hf_hub_download: {download_error}')
126
- # Attempt to list files again for debugging
127
- try:
128
- print('Attempting to list repo files again for debugging...')
129
- files_debug = api.list_repo_files(repo_id=repo_id_val, repo_type='dataset')
130
- print(f'Files found (debug): {files_debug}')
131
- except Exception as list_error:
132
- print(f'Error listing files during debug: {list_error}')
133
- sys.exit(1)
134
-
135
-
136
- if filepath and os.path.exists(filepath):
137
- print(f'Successfully downloaded backup to temporary directory: {filepath}')
138
-
139
- # Files/Dirs to restore (relative paths within CLOUDREVE_DIR)
140
- items_to_restore = ['cloudreve', 'cloudreve.db', 'config.ini']
141
-
142
- # Ensure target directory exists
143
- os.makedirs(\"$CLOUDREVE_DIR\", exist_ok=True)
144
-
145
- print('Listing contents before restore:')
146
- subprocess.run(['ls', '-lA', \"$CLOUDREVE_DIR\"], check=False) # Use -A to show hidden files
147
-
148
- # --- Safer Restore Logic ---
149
- # 1. Extract backup to a temporary location first
150
- extract_temp_dir = os.path.join(temp_dir, 'extracted_backup')
151
- os.makedirs(extract_temp_dir, exist_ok=True)
152
- print(f'Extracting backup archive: {filepath} to {extract_temp_dir}')
153
- try:
154
- with tarfile.open(filepath, 'r:gz') as tar:
155
- tar.extractall(extract_temp_dir)
156
- print('Extraction complete.')
157
- except tarfile.ReadError as tar_err:
158
- print(f'Error reading tar file: {tar_err}')
159
- sys.exit(1)
160
- except Exception as extract_err:
161
- print(f'Error during extraction: {extract_err}')
162
- sys.exit(1)
163
-
164
-
165
- # 2. Check if essential files exist in the extracted backup
166
- essential_files_present = True
167
- for item in items_to_restore:
168
- extracted_item_path = os.path.join(extract_temp_dir, item)
169
- if not os.path.exists(extracted_item_path):
170
- print(f'Error: Essential item "{item}" not found in extracted backup at {extracted_item_path}. Aborting restore.')
171
- essential_files_present = False
172
- break # Stop checking
173
-
174
- if not essential_files_present:
175
- sys.exit(1) # Abort if essential files are missing
176
-
177
- # 3. Delete existing items in the target directory
178
- print(f'Deleting existing items in $CLOUDREVE_DIR before restoring...')
179
- for item in items_to_restore:
180
- target_path = os.path.join(\"$CLOUDREVE_DIR\", item)
181
- if os.path.exists(target_path):
182
- try:
183
- if os.path.isdir(target_path) and not os.path.islink(target_path):
184
- print(f'Deleting directory: {target_path}')
185
- shutil.rmtree(target_path)
186
- else:
187
- print(f'Deleting file/link: {target_path}')
188
- os.remove(target_path)
189
- except OSError as e:
190
- print(f'Error deleting {target_path}: {e}. Continuing...')
191
-
192
-
193
- # 4. Move extracted items to the target directory
194
- print(f'Moving extracted items from {extract_temp_dir} to $CLOUDREVE_DIR...')
195
- for item in items_to_restore:
196
- source_path = os.path.join(extract_temp_dir, item)
197
- target_path = os.path.join(\"$CLOUDREVE_DIR\", item)
198
- try:
199
- print(f'Moving {source_path} to {target_path}')
200
- shutil.move(source_path, target_path)
201
- except Exception as move_err:
202
- print(f'Error moving {item}: {move_err}')
203
- # Decide if this is critical, maybe exit? For now, print and continue.
204
-
205
-
206
- print(f'Successfully restored backup from {latest_backup}')
207
- print('Listing contents after restore:')
208
- subprocess.run(['ls', '-lA', \"$CLOUDREVE_DIR\"], check=False) # Use -A
209
- else:
210
- print(f'Error: Downloaded file path "{filepath}" does not exist or download failed.')
211
- sys.exit(1) # Exit if download path invalid
212
-
213
- except ValueError as ve:
214
- print(f'Configuration Error: {ve}')
215
- sys.exit(1)
216
- except Exception as e:
217
- print(f'Error during backup download/restore: {str(e)}')
218
- # Print traceback for more details
219
- import traceback
220
- traceback.print_exc()
221
- sys.exit(1) # Exit on error
222
- "
223
- }
224
 
225
 
226
  # --- Sync Function ---
227
  sync_data() {
228
- echo "Background Sync Process Started"
 
 
 
 
 
 
 
 
 
 
 
 
229
  while true; do
230
- # Wait for initial Cloudreve setup potentially creating db/config if first run
231
- # Also wait if essential files are missing before attempting backup
232
- while [ ! -f "$CONFIG_FILE_PATH" ] || [ ! -f "$DB_FILE_PATH" ] || [ ! -f "$EXECUTABLE_PATH" ]; do
233
- echo "Waiting for essential Cloudreve files (config.ini, cloudreve.db, cloudreve) to exist before backup attempt..."
 
 
234
  sleep 15
235
- done
 
236
 
237
  echo "Starting sync cycle at $(date)"
238
 
@@ -267,7 +96,8 @@ sync_data() {
267
  fi
268
 
269
  # Define sync interval (use environment variable or default to 3600 seconds = 1 hour)
270
- SYNC_INTERVAL=${SYNC_INTERVAL:-3600}
 
271
  echo "Next sync in ${SYNC_INTERVAL} seconds..."
272
  sleep $SYNC_INTERVAL
273
  done
@@ -290,21 +120,25 @@ echo "Backup restore process finished."
290
  # 2. Check if config file exists after potential restore. If not, Cloudreve needs to run once to create it.
291
  if [ ! -f "$CONFIG_FILE_PATH" ]; then
292
  echo "Config file ($CONFIG_FILE_PATH) not found. Running Cloudreve once to generate initial config."
 
293
  /opt/cloudreve/cloudreve -c "$CONFIG_FILE_PATH"
294
- # Cloudreve will print initial password and exit (or wait for setup if web setup enabled)
295
- # Need to check if it actually created the config...
296
  if [ ! -f "$CONFIG_FILE_PATH" ]; then
297
  echo "CRITICAL: Cloudreve failed to create initial config file. Exiting."
298
  exit 1
299
  else
300
- echo "Initial config file created. Please check logs for admin credentials if needed."
301
- # Consider stopping here or adding a pause? For automated deployment, continue.
 
 
 
302
  fi
303
  fi
304
 
305
 
306
  # 3. Start the background sync process
307
- echo "Starting background data sync..."
 
308
  sync_data & # Run sync_data function in the background
309
  sync_pid=$! # Get PID of background sync process
310
 
 
1
  #!/bin/bash
2
 
3
  # 检查 Hugging Face Token 和 Dataset ID 环境变量
4
+ # ... (这部分不变) ...
5
  if [[ -z "$HF_TOKEN" ]] || [[ -z "$DATASET_ID" ]]; then
6
  echo "Starting Cloudreve without backup/restore functionality - missing HF_TOKEN or DATASET_ID"
7
  # 直接启动 Cloudreve 作为主进程
8
  echo "Starting Cloudreve directly..."
9
+ # --- 确保这里直接启动 Cloudreve ---
10
+ # 如果不需要 Aria2,也要确保启动了 Aria2 或注释掉下面的 Aria2 启动逻辑
11
+ # 假设无备份时仍需 Aria2:
12
+ # aria2c [OPTIONS] & # 根据需要添加 Aria2 参数
13
+ # exec /opt/cloudreve/cloudreve -c /opt/cloudreve/config.ini
14
+
15
+ # 如果无备份时也不需要 Aria2:
16
  exec /opt/cloudreve/cloudreve -c /opt/cloudreve/config.ini
17
+ exit 0
18
  fi
19
 
20
+
21
  # 激活 Python 虚拟环境
22
+ # ... (这部分不变) ...
23
  echo "Activating Python venv..."
24
  source /opt/venv/bin/activate
25
 
26
  # 定义 Cloudreve 主程序目录 和 备份文件前缀
27
+ # ... (这部分不变) ...
28
  CLOUDREVE_DIR="/opt/cloudreve"
29
  BACKUP_PREFIX="cloudreve_backup"
30
  CONFIG_FILE_PATH="/opt/cloudreve/config.ini"
 
32
  EXECUTABLE_PATH="/opt/cloudreve/cloudreve"
33
 
34
  # --- Python 函数定义 ---
35
+ # (Python 函数 upload_backup 和 download_latest_backup 保持不变)
36
+ # ... upload_backup() 函数 ...
37
+ # ... download_latest_backup() 函数 ...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
 
40
  # --- Sync Function ---
41
  sync_data() {
42
+ # **新增:等待 Cloudreve 启动完成**
43
+ echo "Background Sync Process: Initializing..."
44
+ echo "Waiting for Cloudreve service to become available on port 5212..."
45
+ # 循环检查端口 5212 是否在监听,nc -z 返回 0 表示成功
46
+ while ! nc -z 127.0.0.1 5212; do
47
+ echo "Cloudreve not ready yet (port 5212 not open), sleeping for 5 seconds..."
48
+ sleep 5
49
+ done
50
+ echo "Cloudreve service detected on port 5212. Starting main sync loop."
51
+ # **等待逻辑结束**
52
+
53
+ # --- 原有的 sync_data 循环逻辑开始 ---
54
+ echo "Background Sync Process Started" # 可以保留或移除此行
55
  while true; do
56
+ # **移除内部的文件检查循环**,因为 Cloudreve 已运行,文件应存在
57
+ # while [ ! -f "$CONFIG_FILE_PATH" ] || ... (这部分删除) ...
58
+
59
+ # 确保基本文件存在(作为额外的保险,但理论上此时应该存在)
60
+ if [ ! -f "$CONFIG_FILE_PATH" ] || [ ! -f "$DB_FILE_PATH" ] || [ ! -f "$EXECUTABLE_PATH" ]; then
61
+ echo "WARN: Essential Cloudreve files missing even after port check. Waiting..."
62
  sleep 15
63
+ continue # 跳过本次循环
64
+ fi
65
 
66
  echo "Starting sync cycle at $(date)"
67
 
 
96
  fi
97
 
98
  # Define sync interval (use environment variable or default to 3600 seconds = 1 hour)
99
+ # **重要:确保这里的间隔设置合理,避免过于频繁导致资源超限**
100
+ SYNC_INTERVAL=${SYNC_INTERVAL:-3600} # 默认改为 1 小时
101
  echo "Next sync in ${SYNC_INTERVAL} seconds..."
102
  sleep $SYNC_INTERVAL
103
  done
 
120
  # 2. Check if config file exists after potential restore. If not, Cloudreve needs to run once to create it.
121
  if [ ! -f "$CONFIG_FILE_PATH" ]; then
122
  echo "Config file ($CONFIG_FILE_PATH) not found. Running Cloudreve once to generate initial config."
123
+ # 首次运行时,不需要后台运行,让它生成配置就退出
124
  /opt/cloudreve/cloudreve -c "$CONFIG_FILE_PATH"
125
+ # Cloudreve 在没有数据库时会初始化并打印密码然后退出
 
126
  if [ ! -f "$CONFIG_FILE_PATH" ]; then
127
  echo "CRITICAL: Cloudreve failed to create initial config file. Exiting."
128
  exit 1
129
  else
130
+ echo "Initial config file created by first run. Please check logs for admin credentials if needed."
131
+ # 确保数据库文件也可能已创建(即使是空的)
132
+ if [ ! -f "$DB_FILE_PATH" ]; then
133
+ echo "WARN: Database file might not have been created on first run, proceeding anyway."
134
+ fi
135
  fi
136
  fi
137
 
138
 
139
  # 3. Start the background sync process
140
+ # **注意:这一步仍然在 exec Cloudreve 之前**
141
+ echo "Starting background data sync process (will wait for Cloudreve service internally)..."
142
  sync_data & # Run sync_data function in the background
143
  sync_pid=$! # Get PID of background sync process
144