52c75d7a / scripts /utils /persistence.sh
autoface's picture
Updated the log directory structure to change all log paths from `/home/user/logs` to `/home/user/log` to simplify file management. Also update related scripts and configuration files to reflect this change.
5b0a02b
#!/bin/bash
set -euo pipefail # Strict error handling
# Set timezone to UTC for consistent timestamp handling across environments
export TZ=UTC
# Default configuration file path
DEFAULT_CONFIG_FILE="${CONFIG_FILE:-/home/user/config/persistence.conf}"
# Logging functions
log() {
local level="$1"
shift
# Ensure log directory exists
mkdir -p "$(dirname "${LOG_FILE:-/home/user/log/persistence.log}")"
echo "[$(date '+%Y-%m-%d %H:%M:%S')] [$level] $*" | tee -a "${LOG_FILE:-/home/user/log/persistence.log}"
}
log_info() { log "INFO" "$@"; }
log_warn() { log "WARN" "$@"; }
log_error() { log "ERROR" "$@"; }
# Load configuration file
load_configuration() {
local config_file="${1:-$DEFAULT_CONFIG_FILE}"
if [[ ! -f "$config_file" ]]; then
log_warn "Configuration file does not exist: $config_file, using default configuration"
return 0
fi
log_info "Loading configuration file: $config_file"
# Read shell variable format configuration file
source "$config_file"
}
# Set default configuration
set_default_configuration() {
# Core configuration
export HF_TOKEN="${HF_TOKEN:-}"
export DATASET_ID="${DATASET_ID:-}"
export ARCHIVE_PATHS="${ARCHIVE_PATHS:-}"
export RESTORE_PATH="${RESTORE_PATH:-./}"
# Sync configuration
export SYNC_INTERVAL="${SYNC_INTERVAL:-}" # 2 hours
export MAX_ARCHIVES="${MAX_ARCHIVES:-}"
export COMPRESSION_LEVEL="${COMPRESSION_LEVEL:-}"
export INITIAL_BACKUP_DELAY="${INITIAL_BACKUP_DELAY:-}" # 5 minutes
# File configuration
export ARCHIVE_PREFIX="${ARCHIVE_PREFIX:-}"
export ARCHIVE_EXTENSION="${ARCHIVE_EXTENSION:-}"
export EXCLUDE_PATTERNS="${EXCLUDE_PATTERNS:-}"
# Application configuration
export APP_COMMAND="${APP_COMMAND:-}"
export ENABLE_AUTO_RESTORE="${ENABLE_AUTO_RESTORE:-}"
export ENABLE_AUTO_SYNC="${ENABLE_AUTO_SYNC:-}"
# Synchronous restore configuration
export FORCE_SYNC_RESTORE="${FORCE_SYNC_RESTORE:-}"
export ENABLE_INTEGRITY_CHECK="${ENABLE_INTEGRITY_CHECK:-}"
# Logging configuration
export LOG_FILE="${LOG_FILE:-}"
export LOG_LEVEL="${LOG_LEVEL:-}"
}
# Validate required environment variables
validate_configuration() {
local errors=0
if [[ -z "$HF_TOKEN" ]]; then
log_error "Missing required environment variable: HF_TOKEN"
((errors++))
fi
if [[ -z "$DATASET_ID" ]]; then
log_error "Missing required environment variable: DATASET_ID"
((errors++))
fi
if [[ $errors -gt 0 ]]; then
log_error "Configuration validation failed, starting application in non-persistent mode"
return 1
fi
# Set Hugging Face authentication
export HUGGING_FACE_HUB_TOKEN="$HF_TOKEN"
log_info "Configuration validation successful"
return 0
}
# Create archive file
create_archive() {
local timestamp=$(date +%Y%m%d_%H%M%S)
local archive_file="${ARCHIVE_PREFIX}_${timestamp}.${ARCHIVE_EXTENSION}"
# Use user-owned directory instead of /tmp
local temp_dir="/home/user/temp"
mkdir -p "$temp_dir"
local temp_archive="$temp_dir/${archive_file}"
log_info "Starting archive creation: $archive_file" >&2
# Build exclude arguments - only exclude files that would negatively impact HuggingFace datasets backup
local exclude_args=""
local default_excludes="__pycache__,*.tmp,*/temp,*/cache,*/.cache,*/log,*/logs"
local combined_patterns="${EXCLUDE_PATTERNS:-},${default_excludes}"
if [[ -n "$combined_patterns" ]]; then
IFS=',' read -ra patterns <<< "$combined_patterns"
for pattern in "${patterns[@]}"; do
pattern="${pattern// /}" # Remove spaces
if [[ -n "$pattern" ]]; then
exclude_args+=" --exclude='${pattern}'"
fi
done
fi
# Add tar options to handle file changes and permission issues
# Use UTC timezone for consistent timestamp handling
local tar_options="--ignore-failed-read --warning=no-file-changed --warning=no-file-removed --mtime='@$(date +%s)'"
# Create archive
local archive_paths_array
IFS=',' read -ra archive_paths_array <<< "$ARCHIVE_PATHS"
local tar_cmd="tar -czf '$temp_archive' $tar_options $exclude_args"
local valid_paths=()
for path in "${archive_paths_array[@]}"; do
path="${path// /}" # Remove spaces
if [[ -e "$path" ]]; then
# Check if directory is empty
if [[ -d "$path" ]] && [[ -z "$(ls -A "$path" 2>/dev/null)" ]]; then
log_warn "Directory is empty, creating placeholder file: $path" >&2
echo "# Placeholder file for persistence backup" > "$path/.persistence_placeholder"
fi
tar_cmd+=" '$path'"
valid_paths+=("$path")
else
log_warn "Archive path does not exist, skipping: $path" >&2
fi
done
# Check if there are valid paths
if [[ ${#valid_paths[@]} -eq 0 ]]; then
log_error "No valid archive paths found" >&2
return 1
fi
log_info "Executing archive command: $tar_cmd" >&2
# Execute tar command and capture exit code
local tar_exit_code=0
if eval "$tar_cmd" >&2; then
tar_exit_code=$?
else
tar_exit_code=$?
fi
# Check if archive was created successfully (tar exit code 0 or 1 is acceptable)
# Exit code 1 means some files changed during archiving, which is normal
if [[ $tar_exit_code -eq 0 || $tar_exit_code -eq 1 ]] && [[ -f "$temp_archive" ]]; then
log_info "Archive file created successfully: $temp_archive" >&2
if [[ $tar_exit_code -eq 1 ]]; then
log_warn "Some files changed during archiving (this is normal for active applications)" >&2
fi
echo "$temp_archive"
return 0
else
log_error "Archive file creation failed with exit code: $tar_exit_code" >&2
return 1
fi
}
# Call Python upload handler with pre-upload cleanup
run_upload_handler() {
local archive_file="$1"
local filename="$2"
local dataset_id="$3"
local backup_prefix="$4"
local backup_extension="$5"
local max_backups="$6"
local token="$7"
# Get script directory for relative imports
local script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# Call the standalone Python module (now with pre-upload cleanup logic)
python3 "${script_dir}/hf_persistence.py" upload \
--token "$token" \
--dataset-id "$dataset_id" \
--archive-file "$archive_file" \
--filename "$filename" \
--archive-prefix "$backup_prefix" \
--archive-extension "$backup_extension" \
--max-archives "$max_backups"
}
# Upload archive to Hugging Face
upload_archive() {
local archive_file="$1"
local filename=$(basename "$archive_file")
log_info "Starting archive upload: $filename"
# Call embedded Python handler
if run_upload_handler "$archive_file" "$filename" "$DATASET_ID" "$ARCHIVE_PREFIX" "$ARCHIVE_EXTENSION" "$MAX_ARCHIVES" "$HF_TOKEN"; then
log_info "Archive upload completed"
return 0
else
log_error "Archive upload failed"
return 1
fi
}
# Perform one archive operation
perform_archive() {
log_info "Starting archive operation"
local archive_file
if archive_file=$(create_archive); then
# Check if in test mode (HF_TOKEN is test_token)
if [[ "$HF_TOKEN" == "test_token" ]]; then
log_info "Test mode: Archive created successfully, skipping upload"
log_info "Archive file: $archive_file"
ls -la "$archive_file"
log_info "Test mode: Keeping archive file for inspection"
else
if upload_archive "$archive_file"; then
log_info "Archive operation completed successfully"
else
log_error "Archive upload failed"
fi
# Clean up temporary files
rm -f "$archive_file"
fi
else
log_error "Archive creation failed"
return 1
fi
}
# Sync daemon
sync_daemon() {
log_info "Starting sync daemon, interval: ${SYNC_INTERVAL} seconds"
# Initial delay to allow application to fully start
local initial_delay="${INITIAL_BACKUP_DELAY:-300}"
log_info "Waiting ${initial_delay} seconds for application to fully initialize before first backup"
sleep "$initial_delay"
while true; do
perform_archive
log_info "Next sync will execute in ${SYNC_INTERVAL} seconds"
sleep "$SYNC_INTERVAL"
done
}
# Call Python archive lister
run_archive_lister() {
local dataset_id="$1"
local backup_prefix="$2"
local backup_extension="$3"
local token="$4"
# Get script directory for relative imports
local script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# Call the standalone Python module
python3 "${script_dir}/hf_persistence.py" list \
--token "$token" \
--dataset-id "$dataset_id" \
--archive-prefix "$backup_prefix" \
--archive-extension "$backup_extension"
}
# List available archives
list_archives() {
log_info "Getting available archive list"
# Check if in test mode (HF_TOKEN is test_token)
if [[ "$HF_TOKEN" == "test_token" ]]; then
log_info "Test mode: Simulating empty archive list"
echo "No archive files found"
return 0
fi
# Call embedded Python handler
run_archive_lister "$DATASET_ID" "$ARCHIVE_PREFIX" "$ARCHIVE_EXTENSION" "$HF_TOKEN"
}
# Call Python download handler
run_download_handler() {
local backup_name="$1"
local dataset_id="$2"
local restore_path="$3"
local token="$4"
# Get script directory for relative imports
local script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# Call the standalone Python module
python3 "${script_dir}/hf_persistence.py" restore \
--token "$token" \
--dataset-id "$dataset_id" \
--archive-name "$backup_name" \
--restore-path "$restore_path"
}
# Verify data integrity after restoration
verify_data_integrity() {
# Check if integrity verification is enabled
if [[ "$ENABLE_INTEGRITY_CHECK" != "true" ]]; then
log_info "Data integrity verification is disabled, skipping"
return 0
fi
local verification_failed=0
log_info "Starting data integrity verification"
# Check if critical directories exist and are accessible
IFS=',' read -ra archive_paths_array <<< "$ARCHIVE_PATHS"
for path in "${archive_paths_array[@]}"; do
path="${path// /}" # Remove spaces
if [[ -n "$path" ]]; then
if [[ -e "$path" ]]; then
log_info "βœ“ Verified path exists: $path"
# Check if directory is readable
if [[ -d "$path" ]] && [[ ! -r "$path" ]]; then
log_error "βœ— Directory exists but is not readable: $path"
((verification_failed++))
fi
# Check if directory is writable (for future operations)
if [[ -d "$path" ]] && [[ ! -w "$path" ]]; then
log_error "βœ— Directory exists but is not writable: $path"
((verification_failed++))
fi
else
log_warn "⚠ Path does not exist after restoration: $path"
# This might be acceptable for first run
fi
fi
done
# Additional integrity checks can be added here
# For example, checking for specific configuration files
if [[ $verification_failed -gt 0 ]]; then
log_error "Data integrity verification failed with $verification_failed errors"
return 1
else
log_info "βœ“ Data integrity verification passed"
return 0
fi
}
# Restore specified archive with integrity verification
restore_archive() {
local archive_name="${1:-latest}"
local force_restore="${2:-false}"
log_info "Starting synchronous archive restoration: $archive_name"
# If latest, get the latest archive name first
if [[ "$archive_name" == "latest" ]]; then
local archive_list_output
if archive_list_output=$(list_archives 2>&1); then
archive_name=$(echo "$archive_list_output" | grep "LATEST_BACKUP:" | cut -d: -f2)
if [[ -z "$archive_name" ]]; then
log_info "No archive files found, this appears to be the first run"
if [[ "$force_restore" == "true" ]]; then
log_info "Force restore requested but no archives available - this is normal for first run"
log_info "Initializing fresh environment for first-time startup"
# Create necessary directory structure for first run
IFS=',' read -ra archive_paths_array <<< "$ARCHIVE_PATHS"
for path in "${archive_paths_array[@]}"; do
path="${path// /}" # Remove spaces
if [[ -n "$path" ]] && [[ ! -e "$path" ]]; then
log_info "Creating directory for first run: $path"
mkdir -p "$path" || log_warn "Failed to create directory: $path"
fi
done
log_info "βœ“ First-time environment initialization completed"
return 0
else
log_info "Continuing with fresh start (no archives available)"
return 0
fi
fi
else
# Check if output contains "No archive files found"
if echo "$archive_list_output" | grep -q "No archive files found"; then
log_info "No archive files found, this appears to be the first run"
if [[ "$force_restore" == "true" ]]; then
log_info "Force restore requested but no archives available - this is normal for first run"
log_info "Initializing fresh environment for first-time startup"
# Create necessary directory structure for first run
IFS=',' read -ra archive_paths_array <<< "$ARCHIVE_PATHS"
for path in "${archive_paths_array[@]}"; do
path="${path// /}" # Remove spaces
if [[ -n "$path" ]] && [[ ! -e "$path" ]]; then
log_info "Creating directory for first run: $path"
mkdir -p "$path" || log_warn "Failed to create directory: $path"
fi
done
log_info "βœ“ First-time environment initialization completed"
return 0
else
log_info "Continuing with fresh start (no archives available)"
return 0
fi
else
log_error "Failed to get archive list: $archive_list_output"
return 1
fi
fi
fi
log_info "Restoring archive file: $archive_name"
# Call embedded Python handler
if run_download_handler "$archive_name" "$DATASET_ID" "$RESTORE_PATH" "$HF_TOKEN"; then
log_info "Archive extraction completed, verifying data integrity..."
# Verify data integrity after restoration
if verify_data_integrity; then
log_info "βœ“ Archive restoration completed successfully with integrity verification"
return 0
else
log_error "βœ— Data integrity verification failed after restoration"
return 1
fi
else
log_error "βœ— Archive restoration failed during extraction"
return 1
fi
}
# Main program entry
main() {
local command="start"
local config_file="$DEFAULT_CONFIG_FILE"
local verbose=false
local no_restore=false
local no_sync=false
local restore_target=""
# Parse command line arguments
while [[ $# -gt 0 ]]; do
case $1 in
-c|--config)
config_file="$2"
shift 2
;;
-v|--verbose)
verbose=true
shift
;;
--no-restore)
no_restore=true
shift
;;
--no-sync)
no_sync=true
shift
;;
archive|restore|restore-sync|list|daemon|start)
command="$1"
shift
;;
*)
if [[ ("$command" == "restore" || "$command" == "restore-sync") && -z "$restore_target" ]]; then
# Archive name parameter for restore and restore-sync commands
restore_target="$1"
shift
else
log_error "Unknown parameter: $1"
exit 1
fi
;;
esac
done
# Load configuration
load_configuration "$config_file"
set_default_configuration
# Set log level
if [[ "$verbose" == "true" ]]; then
export LOG_LEVEL="DEBUG"
fi
log_info "=== Data Persistence Single File Script Startup ==="
log_info "Version: 4.0"
log_info "Command: $command"
log_info "Configuration file: $config_file"
# Execute corresponding operation based on command
case $command in
archive)
if validate_configuration; then
perform_archive
else
exit 1
fi
;;
restore)
if validate_configuration; then
restore_archive "${restore_target:-latest}"
else
exit 1
fi
;;
restore-sync)
# Synchronous restore with mandatory integrity verification
if validate_configuration; then
log_info "=== SYNCHRONOUS RESTORE MODE ==="
log_info "This is a blocking operation that must complete successfully"
if restore_archive "${restore_target:-latest}" "true"; then
log_info "βœ“ Synchronous restore completed successfully"
exit 0
else
log_error "βœ— Synchronous restore failed"
log_error "Operation aborted to prevent data inconsistency"
exit 1
fi
else
log_error "Configuration validation failed"
exit 1
fi
;;
list)
if validate_configuration; then
list_archives
else
exit 1
fi
;;
daemon)
if validate_configuration; then
sync_daemon
else
exit 1
fi
;;
start)
# Application startup mode with synchronous data restoration
if validate_configuration; then
# Synchronous auto restore - behavior depends on FORCE_SYNC_RESTORE
if [[ "$ENABLE_AUTO_RESTORE" == "true" && "$no_restore" == "false" ]]; then
log_info "=== SYNCHRONOUS DATA RESTORATION PHASE ==="
log_info "Performing synchronous auto restore - this is a blocking operation"
log_info "Force sync restore: $FORCE_SYNC_RESTORE"
log_info "Integrity check: $ENABLE_INTEGRITY_CHECK"
if restore_archive "latest"; then
log_info "βœ“ Synchronous data restoration completed successfully"
log_info "All dependent services can now start safely"
else
if [[ "$FORCE_SYNC_RESTORE" == "true" ]]; then
log_error "βœ— Synchronous data restoration failed"
log_error "FORCE_SYNC_RESTORE=true: Service startup will be aborted to prevent data inconsistency"
log_error "Please check the logs and fix any issues before restarting"
exit 1
else
log_warn "βœ— Synchronous data restoration failed"
log_warn "FORCE_SYNC_RESTORE=false: Continuing with service startup (legacy behavior)"
log_warn "This may result in data inconsistency"
fi
fi
log_info "=== DATA RESTORATION PHASE COMPLETED ==="
else
log_info "Auto restore is disabled, skipping data restoration"
fi
# Start sync daemon only after successful restoration
if [[ "$ENABLE_AUTO_SYNC" == "true" && "$no_sync" == "false" ]]; then
log_info "Starting sync daemon (data restoration completed)"
sync_daemon &
sync_pid=$!
log_info "Sync daemon PID: $sync_pid"
fi
else
log_warn "Configuration validation failed, starting application in non-persistent mode"
log_warn "No data restoration will be performed"
fi
# Start main application only after all restoration is complete
log_info "=== STARTING MAIN APPLICATION ==="
log_info "All data restoration and verification completed"
log_info "Starting main application: $APP_COMMAND"
exec $APP_COMMAND
;;
*)
log_error "Unknown command: $command"
exit 1
;;
esac
}
# Script entry point
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
main "$@"
fi