Spaces:

flzta
/

datas

Paused

App Files Files Community

flzta commited on Mar 29, 2025

Commit

37c78bc

verified ·

1 Parent(s): f34c42c

Update sync_data.sh

Browse files

Files changed (1) hide show

sync_data.sh +47 -213

sync_data.sh CHANGED Viewed

@@ -1,19 +1,30 @@
 #!/bin/bash
 # 检查 Hugging Face Token 和 Dataset ID 环境变量
 if [[ -z "$HF_TOKEN" ]] || [[ -z "$DATASET_ID" ]]; then
     echo "Starting Cloudreve without backup/restore functionality - missing HF_TOKEN or DATASET_ID"
     # 直接启动 Cloudreve 作为主进程
     echo "Starting Cloudreve directly..."
     exec /opt/cloudreve/cloudreve -c /opt/cloudreve/config.ini
-    exit 0 # exec 通常不会返回，但加上 exit 0 以防万一
 fi
 # 激活 Python 虚拟环境
 echo "Activating Python venv..."
 source /opt/venv/bin/activate
 # 定义 Cloudreve 主程序目录 和 备份文件前缀
 CLOUDREVE_DIR="/opt/cloudreve"
 BACKUP_PREFIX="cloudreve_backup"
 CONFIG_FILE_PATH="/opt/cloudreve/config.ini"
@@ -21,218 +32,36 @@ DB_FILE_PATH="/opt/cloudreve/cloudreve.db"
 EXECUTABLE_PATH="/opt/cloudreve/cloudreve"
 # --- Python 函数定义 ---
-# (Python 函数 upload_backup 和 download_latest_backup 保持不变，这里省略以减少篇幅)
-# --- 请将你原始脚本中的 Python 函数 upload_backup 和 download_latest_backup 复制到这里 ---
-# Python 函数: 上传备份
-upload_backup() {
-    file_path="$1"
-    file_name="$2"
-    token="$HF_TOKEN"
-    repo_id="$DATASET_ID"
-    echo "Preparing to upload backup file: $file_path as $file_name to Dataset: $repo_id"
-    python3 -c "
-from huggingface_hub import HfApi
-import sys
-import os
-print(f'HF_TOKEN is set: {os.environ.get(\"HF_TOKEN\") is not None}')
-print(f'DATASET_ID is set: {os.environ.get(\"DATASET_ID\") is not None}')
-def manage_backups(api, repo_id_val, max_files=5):
-    print('Managing old backups...')
-    files = api.list_repo_files(repo_id=repo_id_val, repo_type='dataset')
-    backup_files = [f for f in files if f.startswith('$BACKUP_PREFIX') and f.endswith('.tar.gz')]
-    backup_files.sort()
-    if len(backup_files) >= max_files:
-        print(f'Found {len(backup_files)} backup files, maximum allowed is {max_files}.')
-        files_to_delete = backup_files[:(len(backup_files) - max_files + 1)]
-        for file_to_delete in files_to_delete:
-            try:
-                print(f'Deleting old backup: {file_to_delete}')
-                api.delete_file(path_in_repo=file_to_delete, repo_id=repo_id_val, repo_type='dataset')
-                print(f'Successfully deleted: {file_to_delete}')
-            except Exception as e:
-                print(f'Error deleting {file_to_delete}: {str(e)}')
-    else:
-        print('Number of backup files is within the limit.')
-api = HfApi(token='$token')
-try:
-    repo_id_val = os.environ.get('DATASET_ID') # 从环境变量中获取 repo_id
-    if not repo_id_val:
-        raise ValueError('DATASET_ID environment variable is not set.')
-    print(f'Uploading file: $file_path to {repo_id_val} as $file_name')
-    api.upload_file(
-        path_or_fileobj='$file_path',
-        path_in_repo='$file_name',
-        repo_id=repo_id_val,
-        repo_type='dataset'
-    )
-    print(f'Successfully uploaded $file_name')
-    manage_backups(api, repo_id_val)
-except Exception as e:
-    print(f'Error uploading file: {str(e)}')
-    sys.exit(1) # Exit if upload fails
-"
-}
-# Python 函数: 下载最新备份
-download_latest_backup() {
-  token="$HF_TOKEN"
-  repo_id="$DATASET_ID"
-  echo "Preparing to download the latest backup from Dataset: $repo_id"
-  python3 -c "
-from huggingface_hub import HfApi, hf_hub_download
-import sys
-import os
-import tarfile
-import tempfile
-import shutil
-import subprocess
-print(f'HF_TOKEN is set: {os.environ.get(\"HF_TOKEN\") is not None}')
-print(f'DATASET_ID is set: {os.environ.get(\"DATASET_ID\") is not None}')
-api = HfApi(token='$token')
-try:
-    repo_id_val = os.environ.get('DATASET_ID') # 从环境变量中获取 repo_id
-    if not repo_id_val:
-        raise ValueError('DATASET_ID environment variable is not set.')
-    print(f'Listing files in Dataset: {repo_id_val}')
-    files = api.list_repo_files(repo_id=repo_id_val, repo_type='dataset')
-    backup_files = [f for f in files if f.startswith('$BACKUP_PREFIX') and f.endswith('.tar.gz')]
-    if not backup_files:
-        print('No backup files found in the Dataset. Skipping restore.')
-        sys.exit(0) # Exit successfully if no backups to restore
-    latest_backup = sorted(backup_files)[-1]
-    print(f'Latest backup file found: {latest_backup}')
-    with tempfile.TemporaryDirectory() as temp_dir:
-        print(f'Downloading {latest_backup} to temporary directory {temp_dir}...')
-        try:
-             filepath = hf_hub_download(
-                repo_id=repo_id_val,
-                filename=latest_backup,
-                repo_type='dataset',
-                local_dir=temp_dir,
-                token=os.environ.get('HF_TOKEN') # Pass token explicitly if needed
-            )
-        except Exception as download_error:
-             print(f'Error during hf_hub_download: {download_error}')
-             # Attempt to list files again for debugging
-             try:
-                 print('Attempting to list repo files again for debugging...')
-                 files_debug = api.list_repo_files(repo_id=repo_id_val, repo_type='dataset')
-                 print(f'Files found (debug): {files_debug}')
-             except Exception as list_error:
-                 print(f'Error listing files during debug: {list_error}')
-             sys.exit(1)
-        if filepath and os.path.exists(filepath):
-            print(f'Successfully downloaded backup to temporary directory: {filepath}')
-            # Files/Dirs to restore (relative paths within CLOUDREVE_DIR)
-            items_to_restore = ['cloudreve', 'cloudreve.db', 'config.ini']
-            # Ensure target directory exists
-            os.makedirs(\"$CLOUDREVE_DIR\", exist_ok=True)
-            print('Listing contents before restore:')
-            subprocess.run(['ls', '-lA', \"$CLOUDREVE_DIR\"], check=False) # Use -A to show hidden files
-            # --- Safer Restore Logic ---
-            # 1. Extract backup to a temporary location first
-            extract_temp_dir = os.path.join(temp_dir, 'extracted_backup')
-            os.makedirs(extract_temp_dir, exist_ok=True)
-            print(f'Extracting backup archive: {filepath} to {extract_temp_dir}')
-            try:
-                with tarfile.open(filepath, 'r:gz') as tar:
-                    tar.extractall(extract_temp_dir)
-                print('Extraction complete.')
-            except tarfile.ReadError as tar_err:
-                print(f'Error reading tar file: {tar_err}')
-                sys.exit(1)
-            except Exception as extract_err:
-                 print(f'Error during extraction: {extract_err}')
-                 sys.exit(1)
-            # 2. Check if essential files exist in the extracted backup
-            essential_files_present = True
-            for item in items_to_restore:
-                 extracted_item_path = os.path.join(extract_temp_dir, item)
-                 if not os.path.exists(extracted_item_path):
-                      print(f'Error: Essential item "{item}" not found in extracted backup at {extracted_item_path}. Aborting restore.')
-                      essential_files_present = False
-                      break # Stop checking
-            if not essential_files_present:
-                 sys.exit(1) # Abort if essential files are missing
-            # 3. Delete existing items in the target directory
-            print(f'Deleting existing items in $CLOUDREVE_DIR before restoring...')
-            for item in items_to_restore:
-                target_path = os.path.join(\"$CLOUDREVE_DIR\", item)
-                if os.path.exists(target_path):
-                    try:
-                        if os.path.isdir(target_path) and not os.path.islink(target_path):
-                            print(f'Deleting directory: {target_path}')
-                            shutil.rmtree(target_path)
-                        else:
-                            print(f'Deleting file/link: {target_path}')
-                            os.remove(target_path)
-                    except OSError as e:
-                        print(f'Error deleting {target_path}: {e}. Continuing...')
-            # 4. Move extracted items to the target directory
-            print(f'Moving extracted items from {extract_temp_dir} to $CLOUDREVE_DIR...')
-            for item in items_to_restore:
-                 source_path = os.path.join(extract_temp_dir, item)
-                 target_path = os.path.join(\"$CLOUDREVE_DIR\", item)
-                 try:
-                      print(f'Moving {source_path} to {target_path}')
-                      shutil.move(source_path, target_path)
-                 except Exception as move_err:
-                      print(f'Error moving {item}: {move_err}')
-                      # Decide if this is critical, maybe exit? For now, print and continue.
-            print(f'Successfully restored backup from {latest_backup}')
-            print('Listing contents after restore:')
-            subprocess.run(['ls', '-lA', \"$CLOUDREVE_DIR\"], check=False) # Use -A
-        else:
-            print(f'Error: Downloaded file path "{filepath}" does not exist or download failed.')
-            sys.exit(1) # Exit if download path invalid
-except ValueError as ve:
-    print(f'Configuration Error: {ve}')
-    sys.exit(1)
-except Exception as e:
-    print(f'Error during backup download/restore: {str(e)}')
-    # Print traceback for more details
-    import traceback
-    traceback.print_exc()
-    sys.exit(1) # Exit on error
-"
-}
 # --- Sync Function ---
 sync_data() {
-    echo "Background Sync Process Started"
     while true; do
-        # Wait for initial Cloudreve setup potentially creating db/config if first run
-        # Also wait if essential files are missing before attempting backup
-        while [ ! -f "$CONFIG_FILE_PATH" ] || [ ! -f "$DB_FILE_PATH" ] || [ ! -f "$EXECUTABLE_PATH" ]; do
-             echo "Waiting for essential Cloudreve files (config.ini, cloudreve.db, cloudreve) to exist before backup attempt..."
              sleep 15
-        done
         echo "Starting sync cycle at $(date)"
@@ -267,7 +96,8 @@ sync_data() {
         fi
         # Define sync interval (use environment variable or default to 3600 seconds = 1 hour)
-        SYNC_INTERVAL=${SYNC_INTERVAL:-3600}
         echo "Next sync in ${SYNC_INTERVAL} seconds..."
         sleep $SYNC_INTERVAL
     done
@@ -290,21 +120,25 @@ echo "Backup restore process finished."
 # 2. Check if config file exists after potential restore. If not, Cloudreve needs to run once to create it.
 if [ ! -f "$CONFIG_FILE_PATH" ]; then
     echo "Config file ($CONFIG_FILE_PATH) not found. Running Cloudreve once to generate initial config."
     /opt/cloudreve/cloudreve -c "$CONFIG_FILE_PATH"
-    # Cloudreve will print initial password and exit (or wait for setup if web setup enabled)
-    # Need to check if it actually created the config...
     if [ ! -f "$CONFIG_FILE_PATH" ]; then
         echo "CRITICAL: Cloudreve failed to create initial config file. Exiting."
         exit 1
     else
-         echo "Initial config file created. Please check logs for admin credentials if needed."
-         # Consider stopping here or adding a pause? For automated deployment, continue.
     fi
 fi
 # 3. Start the background sync process
-echo "Starting background data sync..."
 sync_data & # Run sync_data function in the background
 sync_pid=$! # Get PID of background sync process

 #!/bin/bash
 # 检查 Hugging Face Token 和 Dataset ID 环境变量
+# ... (这部分不变) ...
 if [[ -z "$HF_TOKEN" ]] || [[ -z "$DATASET_ID" ]]; then
     echo "Starting Cloudreve without backup/restore functionality - missing HF_TOKEN or DATASET_ID"
     # 直接启动 Cloudreve 作为主进程
     echo "Starting Cloudreve directly..."
+    # --- 确保这里直接启动 Cloudreve ---
+    # 如果不需要 Aria2，也要确保启动了 Aria2 或注释掉下面的 Aria2 启动逻辑
+    # 假设无备份时仍需 Aria2：
+    # aria2c [OPTIONS] & # 根据需要添加 Aria2 参数
+    # exec /opt/cloudreve/cloudreve -c /opt/cloudreve/config.ini
+    # 如果无备份时也不需要 Aria2：
     exec /opt/cloudreve/cloudreve -c /opt/cloudreve/config.ini
+    exit 0
 fi
 # 激活 Python 虚拟环境
+# ... (这部分不变) ...
 echo "Activating Python venv..."
 source /opt/venv/bin/activate
 # 定义 Cloudreve 主程序目录 和 备份文件前缀
+# ... (这部分不变) ...
 CLOUDREVE_DIR="/opt/cloudreve"
 BACKUP_PREFIX="cloudreve_backup"
 CONFIG_FILE_PATH="/opt/cloudreve/config.ini"
 EXECUTABLE_PATH="/opt/cloudreve/cloudreve"
 # --- Python 函数定义 ---
+# (Python 函数 upload_backup 和 download_latest_backup 保持不变)
+# ... upload_backup() 函数 ...
+# ... download_latest_backup() 函数 ...
 # --- Sync Function ---
 sync_data() {
+    # **新增：等待 Cloudreve 启动完成**
+    echo "Background Sync Process: Initializing..."
+    echo "Waiting for Cloudreve service to become available on port 5212..."
+    # 循环检查端口 5212 是否在监听，nc -z 返回 0 表示成功
+    while ! nc -z 127.0.0.1 5212; do
+        echo "Cloudreve not ready yet (port 5212 not open), sleeping for 5 seconds..."
+        sleep 5
+    done
+    echo "Cloudreve service detected on port 5212. Starting main sync loop."
+    # **等待逻辑结束**
+    # --- 原有的 sync_data 循环逻辑开始 ---
+    echo "Background Sync Process Started" # 可以保留或移除此行
     while true; do
+        # **移除内部的文件检查循环**，因为 Cloudreve 已运行，文件应存在
+        # while [ ! -f "$CONFIG_FILE_PATH" ] || ... (这部分删除) ...
+        # 确保基本文件存在（作为额外的保险，但理论上此时应该存在）
+        if [ ! -f "$CONFIG_FILE_PATH" ] || [ ! -f "$DB_FILE_PATH" ] || [ ! -f "$EXECUTABLE_PATH" ]; then
+             echo "WARN: Essential Cloudreve files missing even after port check. Waiting..."
              sleep 15
+             continue # 跳过本次循环
+        fi
         echo "Starting sync cycle at $(date)"
         fi
         # Define sync interval (use environment variable or default to 3600 seconds = 1 hour)
+        # **重要：确保这里的间隔设置合理，避免过于频繁导致资源超限**
+        SYNC_INTERVAL=${SYNC_INTERVAL:-3600} # 默认改为 1 小时
         echo "Next sync in ${SYNC_INTERVAL} seconds..."
         sleep $SYNC_INTERVAL
     done
 # 2. Check if config file exists after potential restore. If not, Cloudreve needs to run once to create it.
 if [ ! -f "$CONFIG_FILE_PATH" ]; then
     echo "Config file ($CONFIG_FILE_PATH) not found. Running Cloudreve once to generate initial config."
+    # 首次运行时，不需要后台运行，让它生成配置就退出
     /opt/cloudreve/cloudreve -c "$CONFIG_FILE_PATH"
+    # Cloudreve 在没有数据库时会初始化并打印密码然后退出
     if [ ! -f "$CONFIG_FILE_PATH" ]; then
         echo "CRITICAL: Cloudreve failed to create initial config file. Exiting."
         exit 1
     else
+         echo "Initial config file created by first run. Please check logs for admin credentials if needed."
+         # 确保数据库文件也可能已创建（即使是空的）
+         if [ ! -f "$DB_FILE_PATH" ]; then
+            echo "WARN: Database file might not have been created on first run, proceeding anyway."
+         fi
     fi
 fi
 # 3. Start the background sync process
+# **注意：这一步仍然在 exec Cloudreve 之前**
+echo "Starting background data sync process (will wait for Cloudreve service internally)..."
 sync_data & # Run sync_data function in the background
 sync_pid=$! # Get PID of background sync process