File size: 3,009 Bytes
c496462 f513198 c496462 f513198 c496462 f513198 c496462 f513198 c496462 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 | #!/bin/bash
#SBATCH --account=<your-slurm-account>
#SBATCH --partition=l40s
#SBATCH --nodes=1
#SBATCH --ntasks=4
#SBATCH --gres=gpu:l40s:1
#SBATCH --time=02:00:00
#SBATCH --job-name=ddpm_hi_eval
#SBATCH --mail-user=<your-email> # replace before submitting
#SBATCH --output=slurm-eval-%j.out
#SBATCH --error=slurm-eval-%j.err
# Evaluate conditional diffusion checkpoint (evaluate_conditional.py).
#
# Usage (interactive):
# bash scripts/shell/evaluate_conditional.sh outputs_conditional_2label_*/checkpoints/best_model.pt
# CHECKPOINT=... OUTPUT_DIR=... bash scripts/shell/evaluate_conditional.sh
#
# Usage (SLURM):
# cd .../Models/2param_DDPM_HI_Emulation && sbatch scripts/shell/evaluate_conditional.sh
# (uses newest outputs_*/checkpoints/best_model.pt under this repo if CHECKPOINT is unset)
#
# Or pick a checkpoint explicitly:
# sbatch --export=CHECKPOINT=/abs/path/to/best_model.pt scripts/shell/evaluate_conditional.sh
#
# Optional env vars: DATA_DIR, OUTPUT_DIR. Extra CLI flags go after the checkpoint path.
set -euo pipefail
cd <DDPM_ROOT>/Models/2param_DDPM_HI_Emulation
module load python/miniconda3-py3.12-usr
CHECKPOINT="${CHECKPOINT:-}"
if [[ -z "${CHECKPOINT}" ]] && [[ -n "${1:-}" ]] && [[ "${1}" != -* ]]; then
CHECKPOINT="$1"
shift
fi
# Uncomment to pin a run when several outputs_* folders exist:
# CHECKPOINT="outputs_conditional_2label_20260330_235542/checkpoints/best_model.pt"
if [[ -z "${CHECKPOINT}" ]]; then
# sbatch does not pass CLI args to the job script; pick latest best_model.pt by mtime
_line="$(find "${PWD}" -maxdepth 8 -type f -name 'best_model.pt' -printf '%T@ %p\n' 2>/dev/null | sort -n | tail -n1)" || true
if [[ -n "${_line}" ]]; then
CHECKPOINT="${_line#* }"
echo "Auto-selected checkpoint (newest best_model.pt): ${CHECKPOINT}" >&2
fi
fi
if [[ -z "${CHECKPOINT}" ]]; then
echo "No checkpoint found. Set CHECKPOINT, pass a .pt path as arg 1, or train first." >&2
echo "SLURM example:" >&2
echo " sbatch --export=CHECKPOINT=${PWD}/outputs_conditional_2label_*/checkpoints/best_model.pt scripts/shell/evaluate_conditional.sh" >&2
exit 1
fi
if [[ ! -f "${CHECKPOINT}" ]]; then
echo "Checkpoint file not found: ${CHECKPOINT}" >&2
exit 1
fi
DATA_DIR="${DATA_DIR:-<DDPM_ROOT>/data/LH_data/params_2}"
OUTPUT_DIR="${OUTPUT_DIR:-evaluation_outputs}"
echo "==============================================="
echo "Job ID: ${SLURM_JOB_ID:-local}"
echo "Node: ${SLURM_NODELIST:-$(hostname)}"
echo "GPU: ${CUDA_VISIBLE_DEVICES:-default}"
echo "Started: $(date)"
echo "Checkpoint: ${CHECKPOINT}"
echo "Output dir: ${OUTPUT_DIR}"
echo "==============================================="
python evaluate_conditional.py \
--checkpoint "${CHECKPOINT}" \
--data_dir "${DATA_DIR}" \
--output_dir "${OUTPUT_DIR}" \
--split test \
--num_samples 8 \
--ddim_steps 50 \
"$@"
echo "==============================================="
echo "Evaluation finished: $(date)"
echo "==============================================="
|