File size: 11,771 Bytes

cfcbbc8

#!/bin/bash
#
# run_smk_sequential.sh - Run Snakemake workflows one at a time for debugging
#
# This script runs each Snakemake workflow sequentially to observe
# the behavior of prompt scripts, supervisor, and coder in real time.
#
# Usage:
#   ./run_smk_sequential.sh                    # Run all steps
#   ./run_smk_sequential.sh --step1           # Run summarize_root (both rules)
#   ./run_smk_sequential.sh --step2           # Run create_numpy
#   ./run_smk_sequential.sh --step3           # Run preprocess
#   ./run_smk_sequential.sh --step4           # Run scores
#   ./run_smk_sequential.sh --step5           # Run categorization
#   ./run_smk_sequential.sh --step1 --step3   # Run summarize_root + preprocess
#

if [ -f ~/.apikeys.sh ]; then
    source ~/.apikeys.sh
fi

# Parse command line arguments
RUN_STEP1=false
RUN_STEP2=false
RUN_STEP3=false
RUN_STEP4=false
RUN_STEP5=false
VALIDATE_STEPS=false
OUTPUT_DIR="results"
CONFIG="config.yml"

# Remember the project root where this script is invoked
PROJECT_ROOT="$(pwd)"


while [[ $# -gt 0 ]]; do
    case $1 in
        --step1)
            RUN_STEP1=true
            shift
            ;;
        --step2)
            RUN_STEP2=true
            shift
            ;;
        --step3)
            RUN_STEP3=true
            shift
            ;;
        --step4)
            RUN_STEP4=true
            shift
            ;;
        --step5)
            RUN_STEP5=true
            shift
            ;;
        --validate)
            VALIDATE_STEPS=true
            shift
            ;;
        --out-dir)
            OUTPUT_DIR="$2"
            shift
            shift
            ;;
        --job-id)
            # Create unique directory based on job ID
            OUTPUT_DIR="results_job_$2"
            shift
            shift
            ;;
        --auto-dir)
            # Create unique directory with timestamp
            TIMESTAMP=$(date +"%Y%m%d_%H%M%S")
            OUTPUT_DIR="results_${TIMESTAMP}"
            shift
            ;;
        --config)
            CONFIG="$2"
            shift
            shift
            ;;
        --help|-h)
            echo "Usage: $0 [OPTIONS]"
            echo ""
            echo "Run Snakemake workflows for ATLAS analysis"
            echo ""
            echo "Options:"
            echo "  --step1    Run summarize_root workflow (both rules: data generation + prompt processing)"
            echo "  --step2    Run create_numpy workflow"
            echo "  --step3    Run preprocess workflow"
            echo "  --step4    Run scores workflow"
            echo "  --step5    Run categorization workflow"
            echo "  --validate    Run validation after each successful step"
            echo "  --out-dir DIR    Custom output directory (default: results)"
            echo "  --job-id ID    Create unique directory: results_job_ID"
            echo "  --auto-dir    Create unique directory with timestamp: results_YYYYMMDD_HHMMSS"
            echo "  --help     Show this help message"
            echo ""
            echo "Examples:"
            echo "  $0 --step1 --auto-dir              # results_20250916_143052/"
            echo "  $0 --step1 --job-id 12345          # results_job_12345/"
            echo "  $0 --step1 --out-dir my_run_1      # my_run_1/"
            echo ""
            echo "If no options are provided, all steps are run sequentially."
            exit 0
            ;;
        *)
            echo "Unknown option: $1"
            echo "Use --help for usage information"
            exit 1
            ;;
    esac
done

# If no specific steps requested, run all
if [[ "$RUN_STEP1" == "false" && "$RUN_STEP2" == "false" && "$RUN_STEP3" == "false" && "$RUN_STEP4" == "false" && "$RUN_STEP5" == "false" ]]; then
    RUN_STEP1=true
    RUN_STEP2=true
    RUN_STEP3=true
    RUN_STEP4=true
    RUN_STEP5=true
    echo "=== Running All Snakemake Workflows Sequentially (Output: ${OUTPUT_DIR}) ==="
else
    echo "=== Running Selected Snakemake Workflows (Output: ${OUTPUT_DIR}) ==="
fi
echo ""

# Set up environment
module load python
conda activate llm_env

# Resolve config file to an absolute path so Snakemake can always find it
if [[ "${CONFIG}" = /* ]]; then
    CONFIG_PATH="${CONFIG}"
else
    CONFIG_PATH="${PROJECT_ROOT}/${CONFIG}"
fi

if [[ ! -f "${CONFIG_PATH}" ]]; then
    echo "❌ Config file not found at ${CONFIG_PATH}"
    exit 1
fi

# Copy and prepare workflow files

OUTPUT_DIR="${OUTPUT_DIR%/}"
if [[ "${OUTPUT_DIR}" = /* ]]; then
    BASE_DIR="${OUTPUT_DIR}"
else
    BASE_DIR="$PWD/${OUTPUT_DIR}"
fi

echo "Preparing workflow files..."
mkdir -p ${OUTPUT_DIR}/prompts_temp
cp -r prompts/* ${OUTPUT_DIR}/prompts_temp/
sed -i "s#{BASE_DIR}#${BASE_DIR}#g" ${OUTPUT_DIR}/prompts_temp/*.txt

cp workflow/summarize_root.smk ${OUTPUT_DIR}/summarize_root_temp.smk
cp workflow/create_numpy.smk ${OUTPUT_DIR}/create_numpy_temp.smk
cp workflow/preprocess.smk ${OUTPUT_DIR}/preprocess_temp.smk
cp workflow/scores.smk ${OUTPUT_DIR}/scores_temp.smk
cp workflow/categorization.smk ${OUTPUT_DIR}/categorization_temp.smk
cp supervisor_coder.py ${OUTPUT_DIR}/supervisor_coder.py
cp write_prompt.py ${OUTPUT_DIR}/write_prompt.py
cp check_soln.py ${OUTPUT_DIR}/check_soln.py

sed -i "s#{BASE_DIR}#${BASE_DIR}#g" ${OUTPUT_DIR}/*_temp.smk
# Replace {CONFIG} in temp snakemake files with the absolute path to the project's config
sed -i "s#{CONFIG}#${CONFIG_PATH}#g" ${OUTPUT_DIR}/*_temp.smk

# Copy solutions for validation
echo "Copying reference solution arrays for validation..."
mkdir -p ${OUTPUT_DIR}/solution/arrays
# Remove any existing files first to avoid permission issues
rm -f ${OUTPUT_DIR}/solution/arrays/*
cp solution/arrays/* ${OUTPUT_DIR}/solution/arrays/

# Create output directory
mkdir -p ${OUTPUT_DIR}/generated_code
mkdir -p ${OUTPUT_DIR}/logs
cp utils.py ${OUTPUT_DIR}/generated_code/utils.py

# Clean up any existing numpy files (store metrics under logs)
rm -f ${OUTPUT_DIR}/logs/success.npy ${OUTPUT_DIR}/logs/calls.npy ${OUTPUT_DIR}/logs/input_tokens.npy ${OUTPUT_DIR}/logs/output_tokens.npy

echo "Starting sequential execution..."
echo ""

# Function to run a single workflow
run_workflow() {
    local workflow_name=$1
    local smk_file=$2
    local target=$3
    local step_number=$4

    echo "========================================="
    echo "Running: $workflow_name"
    echo "Target: $target"
    echo "Time: $(date)"
    echo "========================================="

    # cd into OUTPUT_DIR and do all the work there
    if ! pushd "$OUTPUT_DIR" > /dev/null; then
        echo "❌ Failed to cd into $OUTPUT_DIR"
        return 1
    fi

    # Print the command that will be executed (run inside ${OUTPUT_DIR})
    # Commented out original with --stats, kept for reference
    # echo "Command: snakemake -s \"$smk_file\" -j 1 --forcerun \"$target\" --rerun-incomplete --configfile \"${CONFIG}\" --latency-wait 120 --verbose --stats logs/${workflow_name}.stats > logs/${workflow_name}.log 2>&1"
    echo "Command: snakemake -s \"$smk_file\" -j 1 --forcerun \"$target\" --rerun-incomplete --configfile \"${CONFIG}\" --latency-wait 120 --verbose > logs/${workflow_name}.log 2>&1"
    echo ""

    local start_time=$SECONDS

    # Run snakemake from inside the output directory. Use BASE_DIR for the config file
    # so Snakemake can find the main config.yml even when cwd is the job folder.
    # Original Snakemake run with --stats (commented out)
    # if snakemake -s "$smk_file" -j 1 --forcerun "$target" --rerun-incomplete --configfile "${CONFIG}" --latency-wait 120 --verbose --stats "logs/${workflow_name}.stats" > "logs/${workflow_name}.log" 2>&1; then
    if snakemake -s "$smk_file" -j 1 --forcerun "$target" --rerun-incomplete --configfile "${CONFIG_PATH}" --latency-wait 120 --verbose > "logs/${workflow_name}.log" 2>&1; then
        local duration=$((SECONDS - start_time))
        echo ""
        echo "✅ $workflow_name completed successfully in ${duration}s"
        echo ""
        
        # Run validation for this step if it completed successfully
        if [[ "$VALIDATE_STEPS" == "true" ]]; then
            echo "Running validation for Step $step_number..."
                if python check_soln.py --out_dir "${BASE_DIR}" --step $step_number >> "logs/${workflow_name}_validation.log" 2>&1; then
                echo "✅ Step $step_number validation completed"
                # Check if validation passed
                if [[ -f "${OUTPUT_DIR}/logs/success.npy" ]]; then
                    validation_result=$(python -c "import numpy as np; print(np.load('${OUTPUT_DIR}/logs/success.npy')[$step_number-1])")
                    if [[ "$validation_result" == "1" ]]; then
                        echo "✅ Step $step_number validation: PASS"
                    else
                        echo "❌ Step $step_number validation: FAIL"
                    fi
                fi
            else
                echo "❌ Step $step_number validation failed to run"
            fi
            echo ""
        fi
        popd > /dev/null
        return 0
    else
        local duration=$((SECONDS - start_time))
        echo ""
        echo "❌ $workflow_name failed after ${duration}s"
        echo ""
        popd > /dev/null
        return 1
    fi
}

# Run workflows sequentially based on flags
step_counter=1

if [[ "$RUN_STEP1" == "true" ]]; then
    echo "$step_counter. Running summarize_root workflow (both rules)..."
    # Run both rules: first summarize_root, then insert_root_summary
    run_workflow "summarize_root" "summarize_root_temp.smk" "summarize_root" 1
    run_workflow "insert_root_summary" "summarize_root_temp.smk" "insert_root_summary" 1
    ((step_counter++))
fi

if [[ "$RUN_STEP2" == "true" ]]; then
    echo "$step_counter. Running create_numpy workflow..."
    run_workflow "create_numpy" "create_numpy_temp.smk" "create_numpy" 2
    ((step_counter++))
fi

if [[ "$RUN_STEP3" == "true" ]]; then
    echo "$step_counter. Running preprocess workflow..."
    run_workflow "preprocess" "preprocess_temp.smk" "preprocess" 3
    ((step_counter++))
fi

if [[ "$RUN_STEP4" == "true" ]]; then
    echo "$step_counter. Running scores workflow..."
    run_workflow "scores" "scores_temp.smk" "scores" 4
    ((step_counter++))
fi

if [[ "$RUN_STEP5" == "true" ]]; then
    echo "$step_counter. Running categorization workflow..."
    run_workflow "categorization" "categorization_temp.smk" "categorization" 5
    ((step_counter++))
fi

echo ""
echo "=== Sequential Execution Complete ==="
echo "Check ${OUTPUT_DIR}/ for output files"
echo "Check ${OUTPUT_DIR}/logs/*.log files for detailed logs"
if [[ "$VALIDATE_STEPS" == "true" ]]; then
    echo "Check ${OUTPUT_DIR}/logs/*_validation.log files for validation results"
fi

# Optional: Run final comprehensive validation (only if all steps were run)
if [[ "$RUN_STEP1" == "true" && "$RUN_STEP2" == "true" && "$RUN_STEP3" == "true" && "$RUN_STEP4" == "true" && "$RUN_STEP5" == "true" ]]; then
    echo ""
    if [[ "$VALIDATE_STEPS" == "false" ]]; then
        read -p "Run final comprehensive validation? (y/n): " -n 1 -r
        echo ""
        if [[ $REPLY =~ ^[Yy]$ ]]; then
            echo "Running final comprehensive validation..."
            python check_soln.py --out_dir ${OUTPUT_DIR}
        fi
    else
        echo "Running final comprehensive validation..."
        python check_soln.py --out_dir ${OUTPUT_DIR}
    fi
else
    echo ""
    echo "Note: Final comprehensive validation skipped (not all steps were run)"
fi

# Clean up
echo ""
# echo "Cleaning up temporary files..."
# Comment out the next line to keep prompts_temp for inspection
# rm -rf prompts_temp
# rm -f *_temp.smk
# rm -rf .snakemake  # Clean up Snakemake's default log directory

echo -e "Done!\n"