#!/bin/bash #SBATCH --account= #SBATCH --partition=l40s #SBATCH --nodes=1 #SBATCH --ntasks=4 #SBATCH --gres=gpu:l40s:1 #SBATCH --time=02:00:00 #SBATCH --job-name=ddpm_hi_eval #SBATCH --mail-user= # replace before submitting #SBATCH --output=slurm-eval-%j.out #SBATCH --error=slurm-eval-%j.err # Evaluate conditional diffusion checkpoint (evaluate_conditional.py). # # Usage (interactive): # bash scripts/shell/evaluate_conditional.sh outputs_conditional_2label_*/checkpoints/best_model.pt # CHECKPOINT=... OUTPUT_DIR=... bash scripts/shell/evaluate_conditional.sh # # Usage (SLURM): # cd .../Models/2param_DDPM_HI_Emulation && sbatch scripts/shell/evaluate_conditional.sh # (uses newest outputs_*/checkpoints/best_model.pt under this repo if CHECKPOINT is unset) # # Or pick a checkpoint explicitly: # sbatch --export=CHECKPOINT=/abs/path/to/best_model.pt scripts/shell/evaluate_conditional.sh # # Optional env vars: DATA_DIR, OUTPUT_DIR. Extra CLI flags go after the checkpoint path. set -euo pipefail cd /Models/2param_DDPM_HI_Emulation module load python/miniconda3-py3.12-usr CHECKPOINT="${CHECKPOINT:-}" if [[ -z "${CHECKPOINT}" ]] && [[ -n "${1:-}" ]] && [[ "${1}" != -* ]]; then CHECKPOINT="$1" shift fi # Uncomment to pin a run when several outputs_* folders exist: # CHECKPOINT="outputs_conditional_2label_20260330_235542/checkpoints/best_model.pt" if [[ -z "${CHECKPOINT}" ]]; then # sbatch does not pass CLI args to the job script; pick latest best_model.pt by mtime _line="$(find "${PWD}" -maxdepth 8 -type f -name 'best_model.pt' -printf '%T@ %p\n' 2>/dev/null | sort -n | tail -n1)" || true if [[ -n "${_line}" ]]; then CHECKPOINT="${_line#* }" echo "Auto-selected checkpoint (newest best_model.pt): ${CHECKPOINT}" >&2 fi fi if [[ -z "${CHECKPOINT}" ]]; then echo "No checkpoint found. Set CHECKPOINT, pass a .pt path as arg 1, or train first." >&2 echo "SLURM example:" >&2 echo " sbatch --export=CHECKPOINT=${PWD}/outputs_conditional_2label_*/checkpoints/best_model.pt scripts/shell/evaluate_conditional.sh" >&2 exit 1 fi if [[ ! -f "${CHECKPOINT}" ]]; then echo "Checkpoint file not found: ${CHECKPOINT}" >&2 exit 1 fi DATA_DIR="${DATA_DIR:-/data/LH_data/params_2}" OUTPUT_DIR="${OUTPUT_DIR:-evaluation_outputs}" echo "===============================================" echo "Job ID: ${SLURM_JOB_ID:-local}" echo "Node: ${SLURM_NODELIST:-$(hostname)}" echo "GPU: ${CUDA_VISIBLE_DEVICES:-default}" echo "Started: $(date)" echo "Checkpoint: ${CHECKPOINT}" echo "Output dir: ${OUTPUT_DIR}" echo "===============================================" python evaluate_conditional.py \ --checkpoint "${CHECKPOINT}" \ --data_dir "${DATA_DIR}" \ --output_dir "${OUTPUT_DIR}" \ --split test \ --num_samples 8 \ --ddim_steps 50 \ "$@" echo "===============================================" echo "Evaluation finished: $(date)" echo "==============================================="