#!/bin/bash

set -euo pipefail  # Strict error handling

# Set timezone to UTC for consistent timestamp handling across environments
export TZ=UTC

# Default configuration file path
DEFAULT_CONFIG_FILE="${CONFIG_FILE:-/home/user/config/persistence.conf}"

# Logging functions
log() {
    local level="$1"
    shift
    # Ensure log directory exists
    mkdir -p "$(dirname "${LOG_FILE:-/home/user/log/persistence.log}")"
    echo "[$(date '+%Y-%m-%d %H:%M:%S')] [$level] $*" | tee -a "${LOG_FILE:-/home/user/log/persistence.log}"
}

log_info() { log "INFO" "$@"; }
log_warn() { log "WARN" "$@"; }
log_error() { log "ERROR" "$@"; }

# Load configuration file
load_configuration() {
    local config_file="${1:-$DEFAULT_CONFIG_FILE}"

    if [[ ! -f "$config_file" ]]; then
        log_warn "Configuration file does not exist: $config_file, using default configuration"
        return 0
    fi

    log_info "Loading configuration file: $config_file"

    # Read shell variable format configuration file
    source "$config_file"
}

# Set default configuration
set_default_configuration() {
    # Core configuration
    export HF_TOKEN="${HF_TOKEN:-}"
    export DATASET_ID="${DATASET_ID:-}"
    export ARCHIVE_PATHS="${ARCHIVE_PATHS:-}"
    export RESTORE_PATH="${RESTORE_PATH:-./}"

    # Sync configuration
    export SYNC_INTERVAL="${SYNC_INTERVAL:-}"  # 2 hours
    export MAX_ARCHIVES="${MAX_ARCHIVES:-}"
    export COMPRESSION_LEVEL="${COMPRESSION_LEVEL:-}"
    export INITIAL_BACKUP_DELAY="${INITIAL_BACKUP_DELAY:-}"  # 5 minutes

    # File configuration
    export ARCHIVE_PREFIX="${ARCHIVE_PREFIX:-}"
    export ARCHIVE_EXTENSION="${ARCHIVE_EXTENSION:-}"
    export EXCLUDE_PATTERNS="${EXCLUDE_PATTERNS:-}"

    # Application configuration
    export APP_COMMAND="${APP_COMMAND:-}"
    export ENABLE_AUTO_RESTORE="${ENABLE_AUTO_RESTORE:-}"
    export ENABLE_AUTO_SYNC="${ENABLE_AUTO_SYNC:-}"

    # Synchronous restore configuration
    export FORCE_SYNC_RESTORE="${FORCE_SYNC_RESTORE:-}"
    export ENABLE_INTEGRITY_CHECK="${ENABLE_INTEGRITY_CHECK:-}"

    # Logging configuration
    export LOG_FILE="${LOG_FILE:-}"
    export LOG_LEVEL="${LOG_LEVEL:-}"
}

# Validate required environment variables
validate_configuration() {
    local errors=0

    if [[ -z "$HF_TOKEN" ]]; then
        log_error "Missing required environment variable: HF_TOKEN"
        ((errors++))
    fi

    if [[ -z "$DATASET_ID" ]]; then
        log_error "Missing required environment variable: DATASET_ID"
        ((errors++))
    fi

    if [[ $errors -gt 0 ]]; then
        log_error "Configuration validation failed, starting application in non-persistent mode"
        return 1
    fi

    # Set Hugging Face authentication
    export HUGGING_FACE_HUB_TOKEN="$HF_TOKEN"

    log_info "Configuration validation successful"
    return 0
}

# Create archive file
create_archive() {
    local timestamp=$(date +%Y%m%d_%H%M%S)
    local archive_file="${ARCHIVE_PREFIX}_${timestamp}.${ARCHIVE_EXTENSION}"
    # Use user-owned directory instead of /tmp
    local temp_dir="/home/user/temp"
    mkdir -p "$temp_dir"
    local temp_archive="$temp_dir/${archive_file}"

    log_info "Starting archive creation: $archive_file" >&2

    # Build exclude arguments - only exclude files that would negatively impact HuggingFace datasets backup
    local exclude_args=""
    local default_excludes="__pycache__,*.tmp,*/temp,*/cache,*/.cache,*/log,*/logs"
    local combined_patterns="${EXCLUDE_PATTERNS:-},${default_excludes}"

    if [[ -n "$combined_patterns" ]]; then
        IFS=',' read -ra patterns <<< "$combined_patterns"
        for pattern in "${patterns[@]}"; do
            pattern="${pattern// /}"  # Remove spaces
            if [[ -n "$pattern" ]]; then
                exclude_args+=" --exclude='${pattern}'"
            fi
        done
    fi

    # Add tar options to handle file changes and permission issues
    # Use UTC timezone for consistent timestamp handling
    local tar_options="--ignore-failed-read --warning=no-file-changed --warning=no-file-removed --mtime='@$(date +%s)'"

    # Create archive
    local archive_paths_array
    IFS=',' read -ra archive_paths_array <<< "$ARCHIVE_PATHS"

    local tar_cmd="tar -czf '$temp_archive' $tar_options $exclude_args"
    local valid_paths=()

    for path in "${archive_paths_array[@]}"; do
        path="${path// /}"  # Remove spaces
        if [[ -e "$path" ]]; then
            # Check if directory is empty
            if [[ -d "$path" ]] && [[ -z "$(ls -A "$path" 2>/dev/null)" ]]; then
                log_warn "Directory is empty, creating placeholder file: $path" >&2
                echo "# Placeholder file for persistence backup" > "$path/.persistence_placeholder"
            fi
            tar_cmd+=" '$path'"
            valid_paths+=("$path")
        else
            log_warn "Archive path does not exist, skipping: $path" >&2
        fi
    done

    # Check if there are valid paths
    if [[ ${#valid_paths[@]} -eq 0 ]]; then
        log_error "No valid archive paths found" >&2
        return 1
    fi

    log_info "Executing archive command: $tar_cmd" >&2

    # Execute tar command and capture exit code
    local tar_exit_code=0
    if eval "$tar_cmd" >&2; then
        tar_exit_code=$?
    else
        tar_exit_code=$?
    fi

    # Check if archive was created successfully (tar exit code 0 or 1 is acceptable)
    # Exit code 1 means some files changed during archiving, which is normal
    if [[ $tar_exit_code -eq 0 || $tar_exit_code -eq 1 ]] && [[ -f "$temp_archive" ]]; then
        log_info "Archive file created successfully: $temp_archive" >&2
        if [[ $tar_exit_code -eq 1 ]]; then
            log_warn "Some files changed during archiving (this is normal for active applications)" >&2
        fi
        echo "$temp_archive"
        return 0
    else
        log_error "Archive file creation failed with exit code: $tar_exit_code" >&2
        return 1
    fi
}

# Call Python upload handler with pre-upload cleanup
run_upload_handler() {
    local archive_file="$1"
    local filename="$2"
    local dataset_id="$3"
    local backup_prefix="$4"
    local backup_extension="$5"
    local max_backups="$6"
    local token="$7"

    # Get script directory for relative imports
    local script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
    
    # Call the standalone Python module (now with pre-upload cleanup logic)
    python3 "${script_dir}/hf_persistence.py" upload \
        --token "$token" \
        --dataset-id "$dataset_id" \
        --archive-file "$archive_file" \
        --filename "$filename" \
        --archive-prefix "$backup_prefix" \
        --archive-extension "$backup_extension" \
        --max-archives "$max_backups"
}

# Upload archive to Hugging Face
upload_archive() {
    local archive_file="$1"
    local filename=$(basename "$archive_file")

    log_info "Starting archive upload: $filename"

    # Call embedded Python handler
    if run_upload_handler "$archive_file" "$filename" "$DATASET_ID" "$ARCHIVE_PREFIX" "$ARCHIVE_EXTENSION" "$MAX_ARCHIVES" "$HF_TOKEN"; then
        log_info "Archive upload completed"
        return 0
    else
        log_error "Archive upload failed"
        return 1
    fi
}

# Perform one archive operation
perform_archive() {
    log_info "Starting archive operation"

    local archive_file
    if archive_file=$(create_archive); then
        # Check if in test mode (HF_TOKEN is test_token)
        if [[ "$HF_TOKEN" == "test_token" ]]; then
            log_info "Test mode: Archive created successfully, skipping upload"
            log_info "Archive file: $archive_file"
            ls -la "$archive_file"
            log_info "Test mode: Keeping archive file for inspection"
        else
            if upload_archive "$archive_file"; then
                log_info "Archive operation completed successfully"
            else
                log_error "Archive upload failed"
            fi
            # Clean up temporary files
            rm -f "$archive_file"
        fi
    else
        log_error "Archive creation failed"
        return 1
    fi
}

# Sync daemon
sync_daemon() {
    log_info "Starting sync daemon, interval: ${SYNC_INTERVAL} seconds"

    # Initial delay to allow application to fully start
    local initial_delay="${INITIAL_BACKUP_DELAY:-300}"
    log_info "Waiting ${initial_delay} seconds for application to fully initialize before first backup"
    sleep "$initial_delay"

    while true; do
        perform_archive

        log_info "Next sync will execute in ${SYNC_INTERVAL} seconds"
        sleep "$SYNC_INTERVAL"
    done
}

# Call Python archive lister
run_archive_lister() {
    local dataset_id="$1"
    local backup_prefix="$2"
    local backup_extension="$3"
    local token="$4"

    # Get script directory for relative imports
    local script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
    
    # Call the standalone Python module
    python3 "${script_dir}/hf_persistence.py" list \
        --token "$token" \
        --dataset-id "$dataset_id" \
        --archive-prefix "$backup_prefix" \
        --archive-extension "$backup_extension"
}

# List available archives
list_archives() {
    log_info "Getting available archive list"

    # Check if in test mode (HF_TOKEN is test_token)
    if [[ "$HF_TOKEN" == "test_token" ]]; then
        log_info "Test mode: Simulating empty archive list"
        echo "No archive files found"
        return 0
    fi

    # Call embedded Python handler
    run_archive_lister "$DATASET_ID" "$ARCHIVE_PREFIX" "$ARCHIVE_EXTENSION" "$HF_TOKEN"
}

# Call Python download handler
run_download_handler() {
    local backup_name="$1"
    local dataset_id="$2"
    local restore_path="$3"
    local token="$4"

    # Get script directory for relative imports
    local script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
    
    # Call the standalone Python module
    python3 "${script_dir}/hf_persistence.py" restore \
        --token "$token" \
        --dataset-id "$dataset_id" \
        --archive-name "$backup_name" \
        --restore-path "$restore_path"
}

# Verify data integrity after restoration
verify_data_integrity() {
    # Check if integrity verification is enabled
    if [[ "$ENABLE_INTEGRITY_CHECK" != "true" ]]; then
        log_info "Data integrity verification is disabled, skipping"
        return 0
    fi

    local verification_failed=0

    log_info "Starting data integrity verification"

    # Check if critical directories exist and are accessible
    IFS=',' read -ra archive_paths_array <<< "$ARCHIVE_PATHS"
    for path in "${archive_paths_array[@]}"; do
        path="${path// /}"  # Remove spaces
        if [[ -n "$path" ]]; then
            if [[ -e "$path" ]]; then
                log_info "✓ Verified path exists: $path"
                # Check if directory is readable
                if [[ -d "$path" ]] && [[ ! -r "$path" ]]; then
                    log_error "✗ Directory exists but is not readable: $path"
                    ((verification_failed++))
                fi
                # Check if directory is writable (for future operations)
                if [[ -d "$path" ]] && [[ ! -w "$path" ]]; then
                    log_error "✗ Directory exists but is not writable: $path"
                    ((verification_failed++))
                fi
            else
                log_warn "⚠ Path does not exist after restoration: $path"
                # This might be acceptable for first run
            fi
        fi
    done

    # Additional integrity checks can be added here
    # For example, checking for specific configuration files

    if [[ $verification_failed -gt 0 ]]; then
        log_error "Data integrity verification failed with $verification_failed errors"
        return 1
    else
        log_info "✓ Data integrity verification passed"
        return 0
    fi
}

# Restore specified archive with integrity verification
restore_archive() {
    local archive_name="${1:-latest}"
    local force_restore="${2:-false}"

    log_info "Starting synchronous archive restoration: $archive_name"

    # If latest, get the latest archive name first
    if [[ "$archive_name" == "latest" ]]; then
        local archive_list_output
        if archive_list_output=$(list_archives 2>&1); then
            archive_name=$(echo "$archive_list_output" | grep "LATEST_BACKUP:" | cut -d: -f2)
            if [[ -z "$archive_name" ]]; then
                log_info "No archive files found, this appears to be the first run"
                if [[ "$force_restore" == "true" ]]; then
                    log_info "Force restore requested but no archives available - this is normal for first run"
                    log_info "Initializing fresh environment for first-time startup"

                    # Create necessary directory structure for first run
                    IFS=',' read -ra archive_paths_array <<< "$ARCHIVE_PATHS"
                    for path in "${archive_paths_array[@]}"; do
                        path="${path// /}"  # Remove spaces
                        if [[ -n "$path" ]] && [[ ! -e "$path" ]]; then
                            log_info "Creating directory for first run: $path"
                            mkdir -p "$path" || log_warn "Failed to create directory: $path"
                        fi
                    done

                    log_info "✓ First-time environment initialization completed"
                    return 0
                else
                    log_info "Continuing with fresh start (no archives available)"
                    return 0
                fi
            fi
        else
            # Check if output contains "No archive files found"
            if echo "$archive_list_output" | grep -q "No archive files found"; then
                log_info "No archive files found, this appears to be the first run"
                if [[ "$force_restore" == "true" ]]; then
                    log_info "Force restore requested but no archives available - this is normal for first run"
                    log_info "Initializing fresh environment for first-time startup"

                    # Create necessary directory structure for first run
                    IFS=',' read -ra archive_paths_array <<< "$ARCHIVE_PATHS"
                    for path in "${archive_paths_array[@]}"; do
                        path="${path// /}"  # Remove spaces
                        if [[ -n "$path" ]] && [[ ! -e "$path" ]]; then
                            log_info "Creating directory for first run: $path"
                            mkdir -p "$path" || log_warn "Failed to create directory: $path"
                        fi
                    done

                    log_info "✓ First-time environment initialization completed"
                    return 0
                else
                    log_info "Continuing with fresh start (no archives available)"
                    return 0
                fi
            else
                log_error "Failed to get archive list: $archive_list_output"
                return 1
            fi
        fi
    fi

    log_info "Restoring archive file: $archive_name"

    # Call embedded Python handler
    if run_download_handler "$archive_name" "$DATASET_ID" "$RESTORE_PATH" "$HF_TOKEN"; then
        log_info "Archive extraction completed, verifying data integrity..."

        # Verify data integrity after restoration
        if verify_data_integrity; then
            log_info "✓ Archive restoration completed successfully with integrity verification"
            return 0
        else
            log_error "✗ Data integrity verification failed after restoration"
            return 1
        fi
    else
        log_error "✗ Archive restoration failed during extraction"
        return 1
    fi
}

# Main program entry
main() {
    local command="start"
    local config_file="$DEFAULT_CONFIG_FILE"
    local verbose=false
    local no_restore=false
    local no_sync=false
    local restore_target=""

    # Parse command line arguments
    while [[ $# -gt 0 ]]; do
        case $1 in
            -c|--config)
                config_file="$2"
                shift 2
                ;;
            -v|--verbose)
                verbose=true
                shift
                ;;
            --no-restore)
                no_restore=true
                shift
                ;;
            --no-sync)
                no_sync=true
                shift
                ;;
            archive|restore|restore-sync|list|daemon|start)
                command="$1"
                shift
                ;;
            *)
                if [[ ("$command" == "restore" || "$command" == "restore-sync") && -z "$restore_target" ]]; then
                    # Archive name parameter for restore and restore-sync commands
                    restore_target="$1"
                    shift
                else
                    log_error "Unknown parameter: $1"
                    exit 1
                fi
                ;;
        esac
    done

    # Load configuration
    load_configuration "$config_file"
    set_default_configuration

    # Set log level
    if [[ "$verbose" == "true" ]]; then
        export LOG_LEVEL="DEBUG"
    fi

    log_info "=== Data Persistence Single File Script Startup ==="
    log_info "Version: 4.0"
    log_info "Command: $command"
    log_info "Configuration file: $config_file"

    # Execute corresponding operation based on command
    case $command in
        archive)
            if validate_configuration; then
                perform_archive
            else
                exit 1
            fi
            ;;
        restore)
            if validate_configuration; then
                restore_archive "${restore_target:-latest}"
            else
                exit 1
            fi
            ;;
        restore-sync)
            # Synchronous restore with mandatory integrity verification
            if validate_configuration; then
                log_info "=== SYNCHRONOUS RESTORE MODE ==="
                log_info "This is a blocking operation that must complete successfully"

                if restore_archive "${restore_target:-latest}" "true"; then
                    log_info "✓ Synchronous restore completed successfully"
                    exit 0
                else
                    log_error "✗ Synchronous restore failed"
                    log_error "Operation aborted to prevent data inconsistency"
                    exit 1
                fi
            else
                log_error "Configuration validation failed"
                exit 1
            fi
            ;;
        list)
            if validate_configuration; then
                list_archives
            else
                exit 1
            fi
            ;;
        daemon)
            if validate_configuration; then
                sync_daemon
            else
                exit 1
            fi
            ;;
        start)
            # Application startup mode with synchronous data restoration
            if validate_configuration; then
                # Synchronous auto restore - behavior depends on FORCE_SYNC_RESTORE
                if [[ "$ENABLE_AUTO_RESTORE" == "true" && "$no_restore" == "false" ]]; then
                    log_info "=== SYNCHRONOUS DATA RESTORATION PHASE ==="
                    log_info "Performing synchronous auto restore - this is a blocking operation"
                    log_info "Force sync restore: $FORCE_SYNC_RESTORE"
                    log_info "Integrity check: $ENABLE_INTEGRITY_CHECK"

                    if restore_archive "latest"; then
                        log_info "✓ Synchronous data restoration completed successfully"
                        log_info "All dependent services can now start safely"
                    else
                        if [[ "$FORCE_SYNC_RESTORE" == "true" ]]; then
                            log_error "✗ Synchronous data restoration failed"
                            log_error "FORCE_SYNC_RESTORE=true: Service startup will be aborted to prevent data inconsistency"
                            log_error "Please check the logs and fix any issues before restarting"
                            exit 1
                        else
                            log_warn "✗ Synchronous data restoration failed"
                            log_warn "FORCE_SYNC_RESTORE=false: Continuing with service startup (legacy behavior)"
                            log_warn "This may result in data inconsistency"
                        fi
                    fi

                    log_info "=== DATA RESTORATION PHASE COMPLETED ==="
                else
                    log_info "Auto restore is disabled, skipping data restoration"
                fi

                # Start sync daemon only after successful restoration
                if [[ "$ENABLE_AUTO_SYNC" == "true" && "$no_sync" == "false" ]]; then
                    log_info "Starting sync daemon (data restoration completed)"
                    sync_daemon &
                    sync_pid=$!
                    log_info "Sync daemon PID: $sync_pid"
                fi
            else
                log_warn "Configuration validation failed, starting application in non-persistent mode"
                log_warn "No data restoration will be performed"
            fi

            # Start main application only after all restoration is complete
            log_info "=== STARTING MAIN APPLICATION ==="
            log_info "All data restoration and verification completed"
            log_info "Starting main application: $APP_COMMAND"
            exec $APP_COMMAND
            ;;
        *)
            log_error "Unknown command: $command"
            exit 1
            ;;
    esac
}

# Script entry point
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
    main "$@"
fi