Spaces:

flzta
/

datas

Paused

App Files Files Community

datas / sync_data.sh

flzta

Update sync_data.sh

6fa1e07 verified 9 months ago

raw

history blame contribute delete

15.4 kB

	#!/bin/bash

	# --- Helper Functions ---

	# Function to check if Cloudreve is running by checking the port
	wait_for_cloudreve() {
	echo "[Backup Manager] Waiting for Cloudreve service (port 5212)..."
	while ! nc -z 127.0.0.1 5212; do
	echo "[Backup Manager] Cloudreve not ready yet, sleeping 5s..."
	sleep 5
	done
	echo "[Backup Manager] Cloudreve service detected."
	}

	# Function to perform a single backup action
	perform_backup_once() {
	echo "[Backup Action] Performing single backup..."
	local config_path="$1"
	local db_path="$2"
	local exe_path="$3"
	local cloudreve_dir="$4"
	local backup_prefix="$5"

	# Wait brief moment to ensure files are stable after service start
	sleep 5

	if [ ! -f "$config_path" ] \|\| [ ! -f "$db_path" ] \|\| [ ! -f "$exe_path" ]; then
	echo "[Backup Action] WARN: Essential files missing for backup ($config_path, $db_path, $exe_path). Skipping."
	return 1
	fi

	local timestamp=$(date +%Y%m%d_%H%M%S)
	local backup_file="${backup_prefix}_${timestamp}.tar.gz"
	local backup_path="/tmp/${backup_file}"

	echo "[Backup Action] Compressing data (exec, db, config) to: $backup_path"
	tar -czf "$backup_path" -C "$cloudreve_dir" \
	"$(basename "$exe_path")" \
	"$(basename "$db_path")" \
	"$(basename "$config_path")" \
	\|\| { echo "[Backup Action] ERROR: tar command failed."; rm -f "$backup_path"; return 1; } # Add error check

	if [ -s "$backup_path" ]; then
	echo "[Backup Action] Compression complete. Size: $(ls -lh "$backup_path" \| awk '{print $5}')"
	echo "[Backup Action] Uploading backup to HuggingFace..."
	# Call the Python upload_backup function
	upload_backup "$backup_path" "${backup_file}"
	local upload_status=$?
	if [ $upload_status -ne 0 ]; then
	echo "[Backup Action] ERROR: Backup upload failed. Keeping local archive: $backup_path"
	return 1
	else
	echo "[Backup Action] Upload successful. Removing local archive."
	rm -f "$backup_path"
	return 0
	fi
	else
	echo "[Backup Action] ERROR: Compression failed or created empty file. Skipping upload."
	rm -f "$backup_path"
	return 1
	fi
	}

	# --- Python Function Definitions (Ensure these are complete and correct) ---

	# Python Function: Upload Backup
	upload_backup() {
	local file_path="$1"; local file_name="$2"; local token="$HF_TOKEN"; local repo_id="$DATASET_ID"
	echo "[Python Upload] Preparing: $file_path as $file_name to $repo_id"
	python3 -c "
	import sys, os, time
	from huggingface_hub import HfApi, list_repo_files, delete_file, upload_file
	print(f'[Python Upload] HF_TOKEN is set: {os.environ.get(\"HF_TOKEN\") is not None}')
	print(f'[Python Upload] DATASET_ID is set: {os.environ.get(\"DATASET_ID\") is not None}')

	def manage_backups(api, repo_id_val, backup_prefix_val, max_files=50): # Increased max_files
	print('[Python Upload] Managing old backups...')
	try:
	# Retry logic for listing files, as sometimes HF API might be slow/flaky
	for attempt in range(3):
	try:
	files = list_repo_files(repo_id=repo_id_val, repo_type='dataset', token=os.environ.get('HF_TOKEN'))
	break # Success
	except Exception as list_err:
	print(f'[Python Upload] Error listing files (attempt {attempt+1}/3): {list_err}')
	if attempt == 2: raise # Raise after last attempt
	time.sleep(5) # Wait before retrying

	backup_files = sorted([f for f in files if f.startswith(backup_prefix_val) and f.endswith('.tar.gz')])
	print(f'[Python Upload] Found {len(backup_files)} existing backup files.')
	if len(backup_files) >= max_files:
	num_to_delete = len(backup_files) - max_files + 1
	print(f'[Python Upload] Max backups ({max_files}) reached. Need to delete {num_to_delete} oldest backups.')
	files_to_delete = backup_files[:num_to_delete]
	for file_to_delete in files_to_delete:
	try:
	print(f'[Python Upload] Deleting old backup: {file_to_delete}')
	delete_file(path_in_repo=file_to_delete, repo_id=repo_id_val, repo_type='dataset', token=os.environ.get('HF_TOKEN'))
	print(f'[Python Upload] Successfully deleted: {file_to_delete}')
	except Exception as e:
	print(f'[Python Upload] ERROR deleting {file_to_delete}: {str(e)}') # Log error but continue
	else:
	print(f'[Python Upload] Number of backups ({len(backup_files)}) is within the limit ({max_files}).')
	except Exception as e:
	print(f'[Python Upload] ERROR during backup management: {e}')

	api = HfApi(token='$token')
	repo_id_val = os.environ.get('DATASET_ID')
	file_path_val = '$file_path'
	file_name_val = '$file_name'
	backup_prefix_val = '$BACKUP_PREFIX' # Pass prefix correctly

	if not repo_id_val: print('[Python Upload] ERROR: DATASET_ID missing.'); sys.exit(1)
	if not os.path.exists(file_path_val): print(f'[Python Upload] ERROR: Backup file {file_path_val} not found.'); sys.exit(1)

	try:
	print(f'[Python Upload] Uploading: {file_path_val} to {repo_id_val} as {file_name_val}')
	upload_file(
	path_or_fileobj=file_path_val,
	path_in_repo=file_name_val,
	repo_id=repo_id_val,
	repo_type='dataset',
	token=os.environ.get('HF_TOKEN') # Explicitly pass token
	)
	print(f'[Python Upload] Successfully uploaded {file_name_val}')
	manage_backups(api, repo_id_val, backup_prefix_val) # Pass prefix
	except Exception as e:
	import traceback
	print(f'[Python Upload] ERROR uploading file: {str(e)}')
	# traceback.print_exc() # Uncomment for full traceback if needed
	sys.exit(1) # Indicate failure
	"
	return $? # Return python exit code
	}

	# Python Function: Download Latest Backup
	download_latest_backup() {
	local token="$HF_TOKEN"; local repo_id="$DATASET_ID"
	local download_flag_file="/tmp/backup_restored.flag" # Flag file
	rm -f "$download_flag_file" # Ensure flag is removed initially
	echo "[Python Restore] Preparing to download from Dataset: $repo_id"

	python3 -c "
	import sys, os, tarfile, tempfile, shutil, subprocess, time
	from huggingface_hub import HfApi, hf_hub_download, list_repo_files
	print(f'[Python Restore] HF_TOKEN is set: {os.environ.get(\"HF_TOKEN\") is not None}')
	print(f'[Python Restore] DATASET_ID is set: {os.environ.get(\"DATASET_ID\") is not None}')
	flag_file = '$download_flag_file'

	api = HfApi(token='$token')
	repo_id_val = os.environ.get('DATASET_ID')
	if not repo_id_val: print('[Python Restore] ERROR: DATASET_ID missing.'); sys.exit(1)

	try:
	print(f'[Python Restore] Listing files in Dataset: {repo_id_val}')
	# Retry logic for listing files
	for attempt in range(3):
	try:
	files = list_repo_files(repo_id=repo_id_val, repo_type='dataset', token=os.environ.get('HF_TOKEN'))
	break # Success
	except Exception as list_err:
	print(f'[Python Restore] Error listing files (attempt {attempt+1}/3): {list_err}')
	if attempt == 2: raise
	time.sleep(5)

	backup_files = sorted([f for f in files if f.startswith('$BACKUP_PREFIX') and f.endswith('.tar.gz')])

	if not backup_files:
	print('[Python Restore] No backup files found. Skipping restore.')
	sys.exit(0) # Success: No backup found is OK for script flow

	latest_backup = backup_files[-1]
	print(f'[Python Restore] Latest backup file found: {latest_backup}')

	with tempfile.TemporaryDirectory() as temp_dir:
	print(f'[Python Restore] Downloading {latest_backup} to {temp_dir}...')
	try:
	filepath = hf_hub_download(repo_id=repo_id_val, filename=latest_backup, repo_type='dataset', local_dir=temp_dir, token=os.environ.get('HF_TOKEN'))
	except Exception as download_error: print(f'[Python Restore] ERROR during download: {download_error}'); sys.exit(1)

	if filepath and os.path.exists(filepath):
	print(f'[Python Restore] Download successful: {filepath}')
	items_to_restore = ['cloudreve', 'cloudreve.db', 'config.ini'] # Files to restore
	target_dir = os.environ.get('CLOUDREVE_DIR', '/opt/cloudreve') # Get target dir from env
	os.makedirs(target_dir, exist_ok=True)
	print('[Python Restore] Contents before restore:'); subprocess.run(['ls', '-lA', target_dir], check=False)

	extract_temp_dir = os.path.join(temp_dir, 'extracted_backup'); os.makedirs(extract_temp_dir, exist_ok=True)
	print(f'[Python Restore] Extracting archive to {extract_temp_dir}')
	try:
	with tarfile.open(filepath, 'r:gz') as tar: tar.extractall(path=extract_temp_dir)
	print('[Python Restore] Extraction complete.')
	except Exception as extract_err: print(f'[Python Restore] ERROR during extraction: {extract_err}'); sys.exit(1)

	essential_files_present = True
	print(f'[Python Restore] Checking extracted items in {extract_temp_dir}:')
	for item in items_to_restore:
	extracted_path = os.path.join(extract_temp_dir, item)
	print(f'[Python Restore] Checking for: {extracted_path}')
	if not os.path.exists(extracted_path):
	print(f'[Python Restore] ERROR: Essential item "{item}" not found in backup.'); essential_files_present = False
	if not essential_files_present:
	print('[Python Restore] Aborting restore due to missing essential files.'); sys.exit(1)

	print(f'[Python Restore] Deleting existing items in {target_dir}...')
	for item in items_to_restore:
	target_path = os.path.join(target_dir, item)
	if os.path.exists(target_path):
	try:
	if os.path.isdir(target_path) and not os.path.islink(target_path): shutil.rmtree(target_path)
	else: os.remove(target_path)
	print(f'[Python Restore] Deleted: {target_path}')
	except OSError as e: print(f'[Python Restore] Error deleting {target_path}: {e}')

	print(f'[Python Restore] Moving extracted items to {target_dir}...')
	for item in items_to_restore:
	source = os.path.join(extract_temp_dir, item); target = os.path.join(target_dir, item)
	try: shutil.move(source, target); print(f'[Python Restore] Moved: {item}')
	except Exception as move_err: print(f'[Python Restore] Error moving {item}: {move_err}')

	print(f'[Python Restore] Successfully restored backup from {latest_backup}')
	print('[Python Restore] Contents after restore:'); subprocess.run(['ls', '-lA', target_dir], check=False)
	with open(flag_file, 'w') as f: f.write('restored') # Create flag file
	print(f'[Python Restore] Created restore flag file: {flag_file}')
	sys.exit(0) # Indicate successful restore
	else:
	print(f'[Python Restore] ERROR: Downloaded file path invalid or missing: {filepath}')
	sys.exit(1)

	except Exception as e: import traceback; print(f'[Python Restore] ERROR during download/restore: {str(e)}'); traceback.print_exc(); sys.exit(1)
	"
	return $? # Return python exit code
	}

	# --- Background Backup Manager Function ---
	manage_backups_background() {
	local restored_flag_path="$1" # Pass path to the flag file

	echo "[Backup Manager] Process started. Waiting for Cloudreve..."
	wait_for_cloudreve # Wait until Cloudreve port is open

	# Determine if initial backup is needed
	local perform_initial_backup=true
	if [ -f "$restored_flag_path" ]; then
	echo "[Backup Manager] Restore flag found. Skipping initial backup."
	perform_initial_backup=false
	else
	echo "[Backup Manager] No restore flag found. Performing initial backup."
	fi
	# Clean up flag file now that we've checked it
	rm -f "$restored_flag_path"

	# Perform initial backup if needed
	if [ "$perform_initial_backup" = true ]; then
	perform_backup_once "$CONFIG_FILE_PATH" "$DB_FILE_PATH" "$EXECUTABLE_PATH" "$CLOUDREVE_DIR" "$BACKUP_PREFIX"
	if [ $? -eq 0 ]; then
	echo "[Backup Manager] Initial backup completed."
	else
	echo "[Backup Manager] WARNING: Initial backup failed."
	# Decide if we should proceed anyway or exit? For now, proceed.
	fi
	fi

	# Start the periodic backup loop
	echo "[Backup Manager] Starting periodic backup loop..."
	while true; do
	local sync_interval=${SYNC_INTERVAL:-3600} # Default to 1 hour (3600 seconds)
	echo "[Backup Manager] Periodic Sync: Next cycle in ${sync_interval} seconds..."
	sleep "$sync_interval"

	echo "[Backup Manager] Periodic Sync: Starting cycle at $(date)"
	perform_backup_once "$CONFIG_FILE_PATH" "$DB_FILE_PATH" "$EXECUTABLE_PATH" "$CLOUDREVE_DIR" "$BACKUP_PREFIX"
	# Log completion, ignore errors for the loop to continue
	echo "[Backup Manager] Periodic Sync: Cycle finished."
	done
	}


	# --- Main Script Execution ---

	# Activate Python environment
	echo "Activating Python venv..."
	source /opt/venv/bin/activate

	# Define paths
	CLOUDREVE_DIR="/opt/cloudreve"
	BACKUP_PREFIX="cloudreve_backup"
	CONFIG_FILE_PATH="${CLOUDREVE_DIR}/config.ini"
	DB_FILE_PATH="${CLOUDREVE_DIR}/cloudreve.db"
	EXECUTABLE_PATH="${CLOUDREVE_DIR}/cloudreve"
	RESTORE_FLAG_FILE="/tmp/backup_restored.flag" # Define flag file path

	# Export CLOUDREVE_DIR for Python functions if needed
	export CLOUDREVE_DIR

	# 1. Attempt Initial Restore (sets flag file on success)
	echo "--- Step 1: Attempting Initial Restore ---"
	download_latest_backup # This function now creates $RESTORE_FLAG_FILE on success
	restore_exit_code=$?

	if [ $restore_exit_code -ne 0 ]; then
	echo "CRITICAL: Backup restoration attempt failed (Exit code: $restore_exit_code). Exiting."
	# Optional: Clean up flag file even on failure?
	rm -f "$RESTORE_FLAG_FILE"
	exit 1
	elif [ -f "$RESTORE_FLAG_FILE" ]; then
	echo "--- Step 1 Result: Restore successful. ---"
	else
	echo "--- Step 1 Result: Restore skipped (no backup found). ---"
	fi
	# Flag file will be checked and removed by the background manager

	# 2. Start Background Backup Manager
	# It will wait for Cloudreve, check the flag file, do initial backup if needed, then loop.
	echo "--- Step 2: Starting Background Backup Manager ---"
	manage_backups_background "$RESTORE_FLAG_FILE" & # Pass flag file path
	backup_manager_pid=$!

	# 3. Start Cloudreve Main Process using exec
	# This handles first run (init+exit) and normal runs.
	# It becomes the container's main process.
	echo "--- Step 3: Starting Cloudreve Application (using exec) ---"
	exec /opt/cloudreve/cloudreve -c "$CONFIG_FILE_PATH"

	# --- Code below only runs if exec fails ---
	exec_failed_code=$?
	echo "CRITICAL: Failed to execute Cloudreve (Exit code: $exec_failed_code). Terminating background jobs..."
	# Attempt to kill the background backup manager if exec failed
	kill $backup_manager_pid 2>/dev/null
	# Attempt to kill Aria2 if it was started by CMD? This is harder to manage reliably here.
	exit $exec_failed_code