#!/bin/bash set -euo pipefail # Strict error handling # Set timezone to UTC for consistent timestamp handling across environments export TZ=UTC # Default configuration file path DEFAULT_CONFIG_FILE="${CONFIG_FILE:-/home/user/config/persistence.conf}" # Logging functions log() { local level="$1" shift # Ensure log directory exists mkdir -p "$(dirname "${LOG_FILE:-/home/user/log/persistence.log}")" echo "[$(date '+%Y-%m-%d %H:%M:%S')] [$level] $*" | tee -a "${LOG_FILE:-/home/user/log/persistence.log}" } log_info() { log "INFO" "$@"; } log_warn() { log "WARN" "$@"; } log_error() { log "ERROR" "$@"; } # Load configuration file load_configuration() { local config_file="${1:-$DEFAULT_CONFIG_FILE}" if [[ ! -f "$config_file" ]]; then log_warn "Configuration file does not exist: $config_file, using default configuration" return 0 fi log_info "Loading configuration file: $config_file" # Read shell variable format configuration file source "$config_file" } # Set default configuration set_default_configuration() { # Core configuration export HF_TOKEN="${HF_TOKEN:-}" export DATASET_ID="${DATASET_ID:-}" export ARCHIVE_PATHS="${ARCHIVE_PATHS:-}" export RESTORE_PATH="${RESTORE_PATH:-./}" # Sync configuration export SYNC_INTERVAL="${SYNC_INTERVAL:-}" # 2 hours export MAX_ARCHIVES="${MAX_ARCHIVES:-}" export COMPRESSION_LEVEL="${COMPRESSION_LEVEL:-}" export INITIAL_BACKUP_DELAY="${INITIAL_BACKUP_DELAY:-}" # 5 minutes # File configuration export ARCHIVE_PREFIX="${ARCHIVE_PREFIX:-}" export ARCHIVE_EXTENSION="${ARCHIVE_EXTENSION:-}" export EXCLUDE_PATTERNS="${EXCLUDE_PATTERNS:-}" # Application configuration export APP_COMMAND="${APP_COMMAND:-}" export ENABLE_AUTO_RESTORE="${ENABLE_AUTO_RESTORE:-}" export ENABLE_AUTO_SYNC="${ENABLE_AUTO_SYNC:-}" # Synchronous restore configuration export FORCE_SYNC_RESTORE="${FORCE_SYNC_RESTORE:-}" export ENABLE_INTEGRITY_CHECK="${ENABLE_INTEGRITY_CHECK:-}" # Logging configuration export LOG_FILE="${LOG_FILE:-}" export LOG_LEVEL="${LOG_LEVEL:-}" } # Validate required environment variables validate_configuration() { local errors=0 if [[ -z "$HF_TOKEN" ]]; then log_error "Missing required environment variable: HF_TOKEN" ((errors++)) fi if [[ -z "$DATASET_ID" ]]; then log_error "Missing required environment variable: DATASET_ID" ((errors++)) fi if [[ $errors -gt 0 ]]; then log_error "Configuration validation failed, starting application in non-persistent mode" return 1 fi # Set Hugging Face authentication export HUGGING_FACE_HUB_TOKEN="$HF_TOKEN" log_info "Configuration validation successful" return 0 } # Create archive file create_archive() { local timestamp=$(date +%Y%m%d_%H%M%S) local archive_file="${ARCHIVE_PREFIX}_${timestamp}.${ARCHIVE_EXTENSION}" # Use user-owned directory instead of /tmp local temp_dir="/home/user/temp" mkdir -p "$temp_dir" local temp_archive="$temp_dir/${archive_file}" log_info "Starting archive creation: $archive_file" >&2 # Build exclude arguments - only exclude files that would negatively impact HuggingFace datasets backup local exclude_args="" local default_excludes="__pycache__,*.tmp,*/temp,*/cache,*/.cache,*/log,*/logs" local combined_patterns="${EXCLUDE_PATTERNS:-},${default_excludes}" if [[ -n "$combined_patterns" ]]; then IFS=',' read -ra patterns <<< "$combined_patterns" for pattern in "${patterns[@]}"; do pattern="${pattern// /}" # Remove spaces if [[ -n "$pattern" ]]; then exclude_args+=" --exclude='${pattern}'" fi done fi # Add tar options to handle file changes and permission issues # Use UTC timezone for consistent timestamp handling local tar_options="--ignore-failed-read --warning=no-file-changed --warning=no-file-removed --mtime='@$(date +%s)'" # Create archive local archive_paths_array IFS=',' read -ra archive_paths_array <<< "$ARCHIVE_PATHS" local tar_cmd="tar -czf '$temp_archive' $tar_options $exclude_args" local valid_paths=() for path in "${archive_paths_array[@]}"; do path="${path// /}" # Remove spaces if [[ -e "$path" ]]; then # Check if directory is empty if [[ -d "$path" ]] && [[ -z "$(ls -A "$path" 2>/dev/null)" ]]; then log_warn "Directory is empty, creating placeholder file: $path" >&2 echo "# Placeholder file for persistence backup" > "$path/.persistence_placeholder" fi tar_cmd+=" '$path'" valid_paths+=("$path") else log_warn "Archive path does not exist, skipping: $path" >&2 fi done # Check if there are valid paths if [[ ${#valid_paths[@]} -eq 0 ]]; then log_error "No valid archive paths found" >&2 return 1 fi log_info "Executing archive command: $tar_cmd" >&2 # Execute tar command and capture exit code local tar_exit_code=0 if eval "$tar_cmd" >&2; then tar_exit_code=$? else tar_exit_code=$? fi # Check if archive was created successfully (tar exit code 0 or 1 is acceptable) # Exit code 1 means some files changed during archiving, which is normal if [[ $tar_exit_code -eq 0 || $tar_exit_code -eq 1 ]] && [[ -f "$temp_archive" ]]; then log_info "Archive file created successfully: $temp_archive" >&2 if [[ $tar_exit_code -eq 1 ]]; then log_warn "Some files changed during archiving (this is normal for active applications)" >&2 fi echo "$temp_archive" return 0 else log_error "Archive file creation failed with exit code: $tar_exit_code" >&2 return 1 fi } # Call Python upload handler with pre-upload cleanup run_upload_handler() { local archive_file="$1" local filename="$2" local dataset_id="$3" local backup_prefix="$4" local backup_extension="$5" local max_backups="$6" local token="$7" # Get script directory for relative imports local script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" # Call the standalone Python module (now with pre-upload cleanup logic) python3 "${script_dir}/hf_persistence.py" upload \ --token "$token" \ --dataset-id "$dataset_id" \ --archive-file "$archive_file" \ --filename "$filename" \ --archive-prefix "$backup_prefix" \ --archive-extension "$backup_extension" \ --max-archives "$max_backups" } # Upload archive to Hugging Face upload_archive() { local archive_file="$1" local filename=$(basename "$archive_file") log_info "Starting archive upload: $filename" # Call embedded Python handler if run_upload_handler "$archive_file" "$filename" "$DATASET_ID" "$ARCHIVE_PREFIX" "$ARCHIVE_EXTENSION" "$MAX_ARCHIVES" "$HF_TOKEN"; then log_info "Archive upload completed" return 0 else log_error "Archive upload failed" return 1 fi } # Perform one archive operation perform_archive() { log_info "Starting archive operation" local archive_file if archive_file=$(create_archive); then # Check if in test mode (HF_TOKEN is test_token) if [[ "$HF_TOKEN" == "test_token" ]]; then log_info "Test mode: Archive created successfully, skipping upload" log_info "Archive file: $archive_file" ls -la "$archive_file" log_info "Test mode: Keeping archive file for inspection" else if upload_archive "$archive_file"; then log_info "Archive operation completed successfully" else log_error "Archive upload failed" fi # Clean up temporary files rm -f "$archive_file" fi else log_error "Archive creation failed" return 1 fi } # Sync daemon sync_daemon() { log_info "Starting sync daemon, interval: ${SYNC_INTERVAL} seconds" # Initial delay to allow application to fully start local initial_delay="${INITIAL_BACKUP_DELAY:-300}" log_info "Waiting ${initial_delay} seconds for application to fully initialize before first backup" sleep "$initial_delay" while true; do perform_archive log_info "Next sync will execute in ${SYNC_INTERVAL} seconds" sleep "$SYNC_INTERVAL" done } # Call Python archive lister run_archive_lister() { local dataset_id="$1" local backup_prefix="$2" local backup_extension="$3" local token="$4" # Get script directory for relative imports local script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" # Call the standalone Python module python3 "${script_dir}/hf_persistence.py" list \ --token "$token" \ --dataset-id "$dataset_id" \ --archive-prefix "$backup_prefix" \ --archive-extension "$backup_extension" } # List available archives list_archives() { log_info "Getting available archive list" # Check if in test mode (HF_TOKEN is test_token) if [[ "$HF_TOKEN" == "test_token" ]]; then log_info "Test mode: Simulating empty archive list" echo "No archive files found" return 0 fi # Call embedded Python handler run_archive_lister "$DATASET_ID" "$ARCHIVE_PREFIX" "$ARCHIVE_EXTENSION" "$HF_TOKEN" } # Call Python download handler run_download_handler() { local backup_name="$1" local dataset_id="$2" local restore_path="$3" local token="$4" # Get script directory for relative imports local script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" # Call the standalone Python module python3 "${script_dir}/hf_persistence.py" restore \ --token "$token" \ --dataset-id "$dataset_id" \ --archive-name "$backup_name" \ --restore-path "$restore_path" } # Verify data integrity after restoration verify_data_integrity() { # Check if integrity verification is enabled if [[ "$ENABLE_INTEGRITY_CHECK" != "true" ]]; then log_info "Data integrity verification is disabled, skipping" return 0 fi local verification_failed=0 log_info "Starting data integrity verification" # Check if critical directories exist and are accessible IFS=',' read -ra archive_paths_array <<< "$ARCHIVE_PATHS" for path in "${archive_paths_array[@]}"; do path="${path// /}" # Remove spaces if [[ -n "$path" ]]; then if [[ -e "$path" ]]; then log_info "✓ Verified path exists: $path" # Check if directory is readable if [[ -d "$path" ]] && [[ ! -r "$path" ]]; then log_error "✗ Directory exists but is not readable: $path" ((verification_failed++)) fi # Check if directory is writable (for future operations) if [[ -d "$path" ]] && [[ ! -w "$path" ]]; then log_error "✗ Directory exists but is not writable: $path" ((verification_failed++)) fi else log_warn "⚠ Path does not exist after restoration: $path" # This might be acceptable for first run fi fi done # Additional integrity checks can be added here # For example, checking for specific configuration files if [[ $verification_failed -gt 0 ]]; then log_error "Data integrity verification failed with $verification_failed errors" return 1 else log_info "✓ Data integrity verification passed" return 0 fi } # Restore specified archive with integrity verification restore_archive() { local archive_name="${1:-latest}" local force_restore="${2:-false}" log_info "Starting synchronous archive restoration: $archive_name" # If latest, get the latest archive name first if [[ "$archive_name" == "latest" ]]; then local archive_list_output if archive_list_output=$(list_archives 2>&1); then archive_name=$(echo "$archive_list_output" | grep "LATEST_BACKUP:" | cut -d: -f2) if [[ -z "$archive_name" ]]; then log_info "No archive files found, this appears to be the first run" if [[ "$force_restore" == "true" ]]; then log_info "Force restore requested but no archives available - this is normal for first run" log_info "Initializing fresh environment for first-time startup" # Create necessary directory structure for first run IFS=',' read -ra archive_paths_array <<< "$ARCHIVE_PATHS" for path in "${archive_paths_array[@]}"; do path="${path// /}" # Remove spaces if [[ -n "$path" ]] && [[ ! -e "$path" ]]; then log_info "Creating directory for first run: $path" mkdir -p "$path" || log_warn "Failed to create directory: $path" fi done log_info "✓ First-time environment initialization completed" return 0 else log_info "Continuing with fresh start (no archives available)" return 0 fi fi else # Check if output contains "No archive files found" if echo "$archive_list_output" | grep -q "No archive files found"; then log_info "No archive files found, this appears to be the first run" if [[ "$force_restore" == "true" ]]; then log_info "Force restore requested but no archives available - this is normal for first run" log_info "Initializing fresh environment for first-time startup" # Create necessary directory structure for first run IFS=',' read -ra archive_paths_array <<< "$ARCHIVE_PATHS" for path in "${archive_paths_array[@]}"; do path="${path// /}" # Remove spaces if [[ -n "$path" ]] && [[ ! -e "$path" ]]; then log_info "Creating directory for first run: $path" mkdir -p "$path" || log_warn "Failed to create directory: $path" fi done log_info "✓ First-time environment initialization completed" return 0 else log_info "Continuing with fresh start (no archives available)" return 0 fi else log_error "Failed to get archive list: $archive_list_output" return 1 fi fi fi log_info "Restoring archive file: $archive_name" # Call embedded Python handler if run_download_handler "$archive_name" "$DATASET_ID" "$RESTORE_PATH" "$HF_TOKEN"; then log_info "Archive extraction completed, verifying data integrity..." # Verify data integrity after restoration if verify_data_integrity; then log_info "✓ Archive restoration completed successfully with integrity verification" return 0 else log_error "✗ Data integrity verification failed after restoration" return 1 fi else log_error "✗ Archive restoration failed during extraction" return 1 fi } # Main program entry main() { local command="start" local config_file="$DEFAULT_CONFIG_FILE" local verbose=false local no_restore=false local no_sync=false local restore_target="" # Parse command line arguments while [[ $# -gt 0 ]]; do case $1 in -c|--config) config_file="$2" shift 2 ;; -v|--verbose) verbose=true shift ;; --no-restore) no_restore=true shift ;; --no-sync) no_sync=true shift ;; archive|restore|restore-sync|list|daemon|start) command="$1" shift ;; *) if [[ ("$command" == "restore" || "$command" == "restore-sync") && -z "$restore_target" ]]; then # Archive name parameter for restore and restore-sync commands restore_target="$1" shift else log_error "Unknown parameter: $1" exit 1 fi ;; esac done # Load configuration load_configuration "$config_file" set_default_configuration # Set log level if [[ "$verbose" == "true" ]]; then export LOG_LEVEL="DEBUG" fi log_info "=== Data Persistence Single File Script Startup ===" log_info "Version: 4.0" log_info "Command: $command" log_info "Configuration file: $config_file" # Execute corresponding operation based on command case $command in archive) if validate_configuration; then perform_archive else exit 1 fi ;; restore) if validate_configuration; then restore_archive "${restore_target:-latest}" else exit 1 fi ;; restore-sync) # Synchronous restore with mandatory integrity verification if validate_configuration; then log_info "=== SYNCHRONOUS RESTORE MODE ===" log_info "This is a blocking operation that must complete successfully" if restore_archive "${restore_target:-latest}" "true"; then log_info "✓ Synchronous restore completed successfully" exit 0 else log_error "✗ Synchronous restore failed" log_error "Operation aborted to prevent data inconsistency" exit 1 fi else log_error "Configuration validation failed" exit 1 fi ;; list) if validate_configuration; then list_archives else exit 1 fi ;; daemon) if validate_configuration; then sync_daemon else exit 1 fi ;; start) # Application startup mode with synchronous data restoration if validate_configuration; then # Synchronous auto restore - behavior depends on FORCE_SYNC_RESTORE if [[ "$ENABLE_AUTO_RESTORE" == "true" && "$no_restore" == "false" ]]; then log_info "=== SYNCHRONOUS DATA RESTORATION PHASE ===" log_info "Performing synchronous auto restore - this is a blocking operation" log_info "Force sync restore: $FORCE_SYNC_RESTORE" log_info "Integrity check: $ENABLE_INTEGRITY_CHECK" if restore_archive "latest"; then log_info "✓ Synchronous data restoration completed successfully" log_info "All dependent services can now start safely" else if [[ "$FORCE_SYNC_RESTORE" == "true" ]]; then log_error "✗ Synchronous data restoration failed" log_error "FORCE_SYNC_RESTORE=true: Service startup will be aborted to prevent data inconsistency" log_error "Please check the logs and fix any issues before restarting" exit 1 else log_warn "✗ Synchronous data restoration failed" log_warn "FORCE_SYNC_RESTORE=false: Continuing with service startup (legacy behavior)" log_warn "This may result in data inconsistency" fi fi log_info "=== DATA RESTORATION PHASE COMPLETED ===" else log_info "Auto restore is disabled, skipping data restoration" fi # Start sync daemon only after successful restoration if [[ "$ENABLE_AUTO_SYNC" == "true" && "$no_sync" == "false" ]]; then log_info "Starting sync daemon (data restoration completed)" sync_daemon & sync_pid=$! log_info "Sync daemon PID: $sync_pid" fi else log_warn "Configuration validation failed, starting application in non-persistent mode" log_warn "No data restoration will be performed" fi # Start main application only after all restoration is complete log_info "=== STARTING MAIN APPLICATION ===" log_info "All data restoration and verification completed" log_info "Starting main application: $APP_COMMAND" exec $APP_COMMAND ;; *) log_error "Unknown command: $command" exit 1 ;; esac } # Script entry point if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then main "$@" fi