Spaces:

flzta
/

data

Paused

App Files Files Community

flzta commited on Mar 29, 2025

Commit

c61036b

verified ·

1 Parent(s): 21f54af

Update sync_data.sh

Browse files

Files changed (1) hide show

sync_data.sh +210 -68

sync_data.sh CHANGED Viewed

@@ -2,18 +2,27 @@
 # 检查 Hugging Face Token 和 Dataset ID 环境变量
 if [[ -z "$HF_TOKEN" ]] || [[ -z "$DATASET_ID" ]]; then
-    echo "Starting without backup functionality - missing HF_TOKEN or DATASET_ID"
     exec /opt/cloudreve/cloudreve -c /opt/cloudreve/config.ini
-    exit 0
 fi
-# 激活虚拟环境
 source /opt/venv/bin/activate
-# 定义 Cloudreve 主程序目录
 CLOUDREVE_DIR="/opt/cloudreve"
 BACKUP_PREFIX="cloudreve_backup"
 # Python 函数: 上传备份
 upload_backup() {
     file_path="$1"
@@ -49,6 +58,8 @@ def manage_backups(api, repo_id_val, max_files=5):
 api = HfApi(token='$token')
 try:
     repo_id_val = os.environ.get('DATASET_ID') # 从环境变量中获取 repo_id
     print(f'Uploading file: $file_path to {repo_id_val} as $file_name')
     api.upload_file(
         path_or_fileobj='$file_path',
@@ -60,6 +71,7 @@ try:
     manage_backups(api, repo_id_val)
 except Exception as e:
     print(f'Error uploading file: {str(e)}')
 "
 }
@@ -71,110 +83,240 @@ download_latest_backup() {
   echo "Preparing to download the latest backup from Dataset: $repo_id"
   python3 -c "
-from huggingface_hub import HfApi
 import sys
 import os
 import tarfile
 import tempfile
 print(f'HF_TOKEN is set: {os.environ.get(\"HF_TOKEN\") is not None}')
 print(f'DATASET_ID is set: {os.environ.get(\"DATASET_ID\") is not None}')
 api = HfApi(token='$token')
 try:
     repo_id_val = os.environ.get('DATASET_ID') # 从环境变量中获取 repo_id
     print(f'Listing files in Dataset: {repo_id_val}')
     files = api.list_repo_files(repo_id=repo_id_val, repo_type='dataset')
     backup_files = [f for f in files if f.startswith('$BACKUP_PREFIX') and f.endswith('.tar.gz')]
     if not backup_files:
-        print('No backup files found in the Dataset.')
-        sys.exit()
     latest_backup = sorted(backup_files)[-1]
     print(f'Latest backup file found: {latest_backup}')
     with tempfile.TemporaryDirectory() as temp_dir:
-        filepath = api.hf_hub_download(
-            repo_id=repo_id_val,
-            filename=latest_backup,
-            repo_type='dataset',
-            local_dir=temp_dir
-        )
         if filepath and os.path.exists(filepath):
             print(f'Successfully downloaded backup to temporary directory: {filepath}')
-            print(\"Before restoring backup:\")
-            import subprocess
-            subprocess.run(['ls', '-l', \"$CLOUDREVE_DIR\"], shell=True, check=False)
-            # 删除现有的 Cloudreve 目录和配置文件
-            import shutil
-            cloudreve_path = os.path.join(\"$CLOUDREVE_DIR\", \"cloudreve\")
-            cloudreve_db_path = os.path.join(\"$CLOUDREVE_DIR\", \"cloudreve.db\")
-            config_ini_path = os.path.join(\"$CLOUDREVE_DIR\", \"config.ini\")
-            if os.path.exists(cloudreve_path):
-                print(f'Deleting: {cloudreve_path}')
-                shutil.rmtree(cloudreve_path, ignore_errors=True)
-            if os.path.exists(cloudreve_db_path):
-                print(f'Deleting: {cloudreve_db_path}')
-                os.remove(cloudreve_db_path)
-            if os.path.exists(config_ini_path):
-                print(f'Deleting: {config_ini_path}')
-                os.remove(config_ini_path)
-            print(\"Deletion complete.\")
-            print(f'Extracting backup archive: {filepath} to $CLOUDREVE_DIR')
-            import tarfile
-            with tarfile.open(filepath, 'r:gz') as tar:
-                tar.extractall(\"$CLOUDREVE_DIR\")
             print(f'Successfully restored backup from {latest_backup}')
-            print(\"After restoring backup:\")
-            subprocess.run(['ls', '-l', \"$CLOUDREVE_DIR\"], shell=True, check=False)
         else:
-            print('Error during file download.')
 except Exception as e:
-    print(f'Error downloading backup: {str(e)}')
 "
 }
-# 首次启动时下载最新备份
-echo "Downloading latest backup from HuggingFace..."
-download_latest_backup
-# 同步函数
 sync_data() {
-    echo "SYNC_DATA FUNCTION IS RUNNING" # 添加了这一行
     while true; do
-        echo "Starting sync process at $(date)"
-        if [ -d "$CLOUDREVE_DIR" ]; then
-            echo "Before compression:"
-            ls -l \"$CLOUDREVE_DIR\"
-            timestamp=$(date +%Y%m%d_%H%M%S)
-            backup_file="${BACKUP_PREFIX}_${timestamp}.tar.gz"
-            backup_path="/tmp/${backup_file}"
-            echo "Compressing Cloudreve directory (including database and config) to: $backup_path"
-            tar -czf "$backup_path" -C "$CLOUDREVE_DIR" cloudreve cloudreve.db config.ini
-            echo "Compression complete."
-            echo "After compression:"
-            ls -l "$backup_path"
             echo "Uploading backup to HuggingFace..."
             upload_backup "$backup_path" "${backup_file}"
-            rm -f "$backup_path"
         else
-            echo "Cloudreve directory does not exist yet, waiting for next sync..."
         fi
-        SYNC_INTERVAL=${SYNC_INTERVAL:-60} # 默认同步间隔改为 60 秒
         echo "Next sync in ${SYNC_INTERVAL} seconds..."
         sleep $SYNC_INTERVAL
     done
 }
-# 延迟启动同步脚本，给 Cloudreve 一些启动时间
-sleep 10
-# 后台启动同步进程
-sync_data &
-# 启动 Halo (这里需要启动 Cloudreve)
-echo "Starting Cloudreve..."
-exec /opt/cloudreve/cloudreve -c /opt/cloudreve/config.ini

 # 检查 Hugging Face Token 和 Dataset ID 环境变量
 if [[ -z "$HF_TOKEN" ]] || [[ -z "$DATASET_ID" ]]; then
+    echo "Starting Cloudreve without backup/restore functionality - missing HF_TOKEN or DATASET_ID"
+    # 直接启动 Cloudreve 作为主进程
+    echo "Starting Cloudreve directly..."
     exec /opt/cloudreve/cloudreve -c /opt/cloudreve/config.ini
+    exit 0 # exec 通常不会返回，但加上 exit 0 以防万一
 fi
+# 激活 Python 虚拟环境
+echo "Activating Python venv..."
 source /opt/venv/bin/activate
+# 定义 Cloudreve 主程序目录 和 备份文件前缀
 CLOUDREVE_DIR="/opt/cloudreve"
 BACKUP_PREFIX="cloudreve_backup"
+CONFIG_FILE_PATH="/opt/cloudreve/config.ini"
+DB_FILE_PATH="/opt/cloudreve/cloudreve.db"
+EXECUTABLE_PATH="/opt/cloudreve/cloudreve"
+# --- Python 函数定义 ---
+# (Python 函数 upload_backup 和 download_latest_backup 保持不变，这里省略以减少篇幅)
+# --- 请将你原始脚本中的 Python 函数 upload_backup 和 download_latest_backup 复制到这里 ---
 # Python 函数: 上传备份
 upload_backup() {
     file_path="$1"
 api = HfApi(token='$token')
 try:
     repo_id_val = os.environ.get('DATASET_ID') # 从环境变量中获取 repo_id
+    if not repo_id_val:
+        raise ValueError('DATASET_ID environment variable is not set.')
     print(f'Uploading file: $file_path to {repo_id_val} as $file_name')
     api.upload_file(
         path_or_fileobj='$file_path',
     manage_backups(api, repo_id_val)
 except Exception as e:
     print(f'Error uploading file: {str(e)}')
+    sys.exit(1) # Exit if upload fails
 "
 }
   echo "Preparing to download the latest backup from Dataset: $repo_id"
   python3 -c "
+from huggingface_hub import HfApi, hf_hub_download
 import sys
 import os
 import tarfile
 import tempfile
+import shutil
+import subprocess
 print(f'HF_TOKEN is set: {os.environ.get(\"HF_TOKEN\") is not None}')
 print(f'DATASET_ID is set: {os.environ.get(\"DATASET_ID\") is not None}')
 api = HfApi(token='$token')
 try:
     repo_id_val = os.environ.get('DATASET_ID') # 从环境变量中获取 repo_id
+    if not repo_id_val:
+        raise ValueError('DATASET_ID environment variable is not set.')
     print(f'Listing files in Dataset: {repo_id_val}')
     files = api.list_repo_files(repo_id=repo_id_val, repo_type='dataset')
     backup_files = [f for f in files if f.startswith('$BACKUP_PREFIX') and f.endswith('.tar.gz')]
     if not backup_files:
+        print('No backup files found in the Dataset. Skipping restore.')
+        sys.exit(0) # Exit successfully if no backups to restore
     latest_backup = sorted(backup_files)[-1]
     print(f'Latest backup file found: {latest_backup}')
     with tempfile.TemporaryDirectory() as temp_dir:
+        print(f'Downloading {latest_backup} to temporary directory {temp_dir}...')
+        try:
+             filepath = hf_hub_download(
+                repo_id=repo_id_val,
+                filename=latest_backup,
+                repo_type='dataset',
+                local_dir=temp_dir,
+                token=os.environ.get('HF_TOKEN') # Pass token explicitly if needed
+            )
+        except Exception as download_error:
+             print(f'Error during hf_hub_download: {download_error}')
+             # Attempt to list files again for debugging
+             try:
+                 print('Attempting to list repo files again for debugging...')
+                 files_debug = api.list_repo_files(repo_id=repo_id_val, repo_type='dataset')
+                 print(f'Files found (debug): {files_debug}')
+             except Exception as list_error:
+                 print(f'Error listing files during debug: {list_error}')
+             sys.exit(1)
         if filepath and os.path.exists(filepath):
             print(f'Successfully downloaded backup to temporary directory: {filepath}')
+            # Files/Dirs to restore (relative paths within CLOUDREVE_DIR)
+            items_to_restore = ['cloudreve', 'cloudreve.db', 'config.ini']
+            # Ensure target directory exists
+            os.makedirs(\"$CLOUDREVE_DIR\", exist_ok=True)
+            print('Listing contents before restore:')
+            subprocess.run(['ls', '-lA', \"$CLOUDREVE_DIR\"], check=False) # Use -A to show hidden files
+            # --- Safer Restore Logic ---
+            # 1. Extract backup to a temporary location first
+            extract_temp_dir = os.path.join(temp_dir, 'extracted_backup')
+            os.makedirs(extract_temp_dir, exist_ok=True)
+            print(f'Extracting backup archive: {filepath} to {extract_temp_dir}')
+            try:
+                with tarfile.open(filepath, 'r:gz') as tar:
+                    tar.extractall(extract_temp_dir)
+                print('Extraction complete.')
+            except tarfile.ReadError as tar_err:
+                print(f'Error reading tar file: {tar_err}')
+                sys.exit(1)
+            except Exception as extract_err:
+                 print(f'Error during extraction: {extract_err}')
+                 sys.exit(1)
+            # 2. Check if essential files exist in the extracted backup
+            essential_files_present = True
+            for item in items_to_restore:
+                 extracted_item_path = os.path.join(extract_temp_dir, item)
+                 if not os.path.exists(extracted_item_path):
+                      print(f'Error: Essential item "{item}" not found in extracted backup at {extracted_item_path}. Aborting restore.')
+                      essential_files_present = False
+                      break # Stop checking
+            if not essential_files_present:
+                 sys.exit(1) # Abort if essential files are missing
+            # 3. Delete existing items in the target directory
+            print(f'Deleting existing items in $CLOUDREVE_DIR before restoring...')
+            for item in items_to_restore:
+                target_path = os.path.join(\"$CLOUDREVE_DIR\", item)
+                if os.path.exists(target_path):
+                    try:
+                        if os.path.isdir(target_path) and not os.path.islink(target_path):
+                            print(f'Deleting directory: {target_path}')
+                            shutil.rmtree(target_path)
+                        else:
+                            print(f'Deleting file/link: {target_path}')
+                            os.remove(target_path)
+                    except OSError as e:
+                        print(f'Error deleting {target_path}: {e}. Continuing...')
+            # 4. Move extracted items to the target directory
+            print(f'Moving extracted items from {extract_temp_dir} to $CLOUDREVE_DIR...')
+            for item in items_to_restore:
+                 source_path = os.path.join(extract_temp_dir, item)
+                 target_path = os.path.join(\"$CLOUDREVE_DIR\", item)
+                 try:
+                      print(f'Moving {source_path} to {target_path}')
+                      shutil.move(source_path, target_path)
+                 except Exception as move_err:
+                      print(f'Error moving {item}: {move_err}')
+                      # Decide if this is critical, maybe exit? For now, print and continue.
             print(f'Successfully restored backup from {latest_backup}')
+            print('Listing contents after restore:')
+            subprocess.run(['ls', '-lA', \"$CLOUDREVE_DIR\"], check=False) # Use -A
         else:
+            print(f'Error: Downloaded file path "{filepath}" does not exist or download failed.')
+            sys.exit(1) # Exit if download path invalid
+except ValueError as ve:
+    print(f'Configuration Error: {ve}')
+    sys.exit(1)
 except Exception as e:
+    print(f'Error during backup download/restore: {str(e)}')
+    # Print traceback for more details
+    import traceback
+    traceback.print_exc()
+    sys.exit(1) # Exit on error
 "
 }
+# --- Sync Function ---
 sync_data() {
+    echo "Background Sync Process Started"
     while true; do
+        # Wait for initial Cloudreve setup potentially creating db/config if first run
+        # Also wait if essential files are missing before attempting backup
+        while [ ! -f "$CONFIG_FILE_PATH" ] || [ ! -f "$DB_FILE_PATH" ] || [ ! -f "$EXECUTABLE_PATH" ]; do
+             echo "Waiting for essential Cloudreve files (config.ini, cloudreve.db, cloudreve) to exist before backup attempt..."
+             sleep 15
+        done
+        echo "Starting sync cycle at $(date)"
+        # Define backup path and name
+        timestamp=$(date +%Y%m%d_%H%M%S)
+        backup_file="${BACKUP_PREFIX}_${timestamp}.tar.gz"
+        backup_path="/tmp/${backup_file}" # Use /tmp for temporary files
+        echo "Compressing Cloudreve data (executable, db, config) to: $backup_path"
+        # Use -C to change directory, ensuring archive paths are relative
+        # Only include the executable, db, and config file
+        tar -czf "$backup_path" -C "$CLOUDREVE_DIR" \
+            $(basename "$EXECUTABLE_PATH") \
+            $(basename "$DB_FILE_PATH") \
+            $(basename "$CONFIG_FILE_PATH")
+        # Check if compression was successful (file exists and is not empty)
+        if [ -s "$backup_path" ]; then
+            echo "Compression complete. File size: $(ls -lh "$backup_path" | awk '{print $5}')"
             echo "Uploading backup to HuggingFace..."
             upload_backup "$backup_path" "${backup_file}"
+            # Check exit status of upload_backup? The python script should exit non-zero on failure.
+            if [ $? -ne 0 ]; then
+                echo "Backup upload failed. Keeping local archive: $backup_path"
+            else
+                echo "Upload successful. Removing local archive."
+                rm -f "$backup_path"
+            fi
         else
+            echo "Compression failed or created an empty file. Skipping upload."
+            rm -f "$backup_path" # Remove potentially empty/corrupt file
         fi
+        # Define sync interval (use environment variable or default to 3600 seconds = 1 hour)
+        SYNC_INTERVAL=${SYNC_INTERVAL:-3600}
         echo "Next sync in ${SYNC_INTERVAL} seconds..."
         sleep $SYNC_INTERVAL
     done
 }
+# --- Main Execution ---
+# 1. Attempt to restore from the latest backup on startup
+echo "Attempting to restore latest backup from HuggingFace..."
+download_latest_backup
+# Check exit code? If restore fails critically, maybe don't start?
+# The python script now exits non-zero on critical errors.
+if [ $? -ne 0 ]; then
+    echo "CRITICAL: Backup restoration failed. Exiting."
+    exit 1
+fi
+echo "Backup restore process finished."
+# 2. Check if config file exists after potential restore. If not, Cloudreve needs to run once to create it.
+if [ ! -f "$CONFIG_FILE_PATH" ]; then
+    echo "Config file ($CONFIG_FILE_PATH) not found. Running Cloudreve once to generate initial config."
+    /opt/cloudreve/cloudreve -c "$CONFIG_FILE_PATH"
+    # Cloudreve will print initial password and exit (or wait for setup if web setup enabled)
+    # Need to check if it actually created the config...
+    if [ ! -f "$CONFIG_FILE_PATH" ]; then
+        echo "CRITICAL: Cloudreve failed to create initial config file. Exiting."
+        exit 1
+    else
+         echo "Initial config file created. Please check logs for admin credentials if needed."
+         # Consider stopping here or adding a pause? For automated deployment, continue.
+    fi
+fi
+# 3. Start the background sync process
+echo "Starting background data sync..."
+sync_data & # Run sync_data function in the background
+sync_pid=$! # Get PID of background sync process
+# 4. Start Cloudreve in the foreground using exec
+# 'exec' replaces the current shell process with the Cloudreve process.
+# This makes Cloudreve the main process of the container.
+echo "Starting Cloudreve application as the main process..."
+exec /opt/cloudreve/cloudreve -c "$CONFIG_FILE_PATH"
+# If exec fails, the script continues here.
+exec_failed_code=$?
+echo "CRITICAL: Failed to execute Cloudreve. Exit code: $exec_failed_code"
+# Attempt to kill the background sync process if exec failed
+kill $sync_pid 2>/dev/null
+exit $exec_failed_code