#!/bin/bash #SBATCH --account= #SBATCH --partition=l40s #SBATCH --nodes=1 #SBATCH --ntasks=8 #SBATCH --gres=gpu:l40s:1 #SBATCH --time=04:00:00 #SBATCH --job-name=ddpm_hi_lh6_eval #SBATCH --mail-user= # replace before submitting #SBATCH --output=slurm-eval-%j.out #SBATCH --error=slurm-eval-%j.err # Evaluate conditional DDPM (6 CAMELS LH parameters). # Submit: # sbatch /Models/6param_ddpm_hi_lh6/scripts/shell/evaluate_conditional_lh6.sh # # Optional overrides (example): # sbatch --export=CHECKPOINT=/path/to/best_model.pt,OUTPUT_DIR=/path/to/eval_out evaluate_conditional_lh6.sh REPO="/Models/6param_ddpm_hi_lh6" cd "${REPO}" || exit 1 module load python/miniconda3-py3.12-usr DATA_DIR="${DATA_DIR:-/data/LH_data/params_6}" # Default: trained run kept under april_26 (large artifacts not duplicated here). CHECKPOINT="${CHECKPOINT:-/april_26/ddpm_hi_lh6/outputs_conditional_6param_20260413_132226/checkpoints/best_model.pt}" OUTPUT_DIR="${OUTPUT_DIR:-${REPO}/evaluation_outputs_6param}" TRAINING_ARGS="${TRAINING_ARGS:-}" echo "===============================================" echo "Job ID: ${SLURM_JOB_ID:-local}" echo "Job Name: ${SLURM_JOB_NAME:-evaluate_conditional_lh6}" echo "Node: ${SLURM_NODELIST:-$(hostname)}" echo "GPU: ${CUDA_VISIBLE_DEVICES:-n/a}" echo "Starting Time: $(date)" echo "CHECKPOINT: ${CHECKPOINT}" echo "DATA_DIR: ${DATA_DIR}" echo "OUTPUT_DIR: ${OUTPUT_DIR}" echo "===============================================" EVAL_ARGS=( python evaluate_conditional.py --checkpoint "${CHECKPOINT}" --data_dir "${DATA_DIR}" --output_dir "${OUTPUT_DIR}" --split test --num_samples 8 --ddim_steps 50 ) if [[ -n "${TRAINING_ARGS}" ]]; then EVAL_ARGS+=(--training_args "${TRAINING_ARGS}") fi "${EVAL_ARGS[@]}" echo "===============================================" echo "Evaluation completed at: $(date)" echo "Plots and evaluation_data.npz under: ${OUTPUT_DIR}" echo "==============================================="