Upload 2-parameter conditional DDPM (HI emulation, CAMELS LH params_2, epoch 200) with full training/eval/posterior toolchain
f513198 verified | #SBATCH --account=<your-slurm-account> | |
| #SBATCH --partition=l40s | |
| #SBATCH --nodes=1 | |
| #SBATCH --ntasks=4 | |
| #SBATCH --gres=gpu:l40s:1 | |
| #SBATCH --time=02:00:00 | |
| #SBATCH --job-name=ddpm_hi_eval | |
| #SBATCH --mail-user=<your-email> # replace before submitting | |
| #SBATCH --output=slurm-eval-%j.out | |
| #SBATCH --error=slurm-eval-%j.err | |
| # Evaluate conditional diffusion checkpoint (evaluate_conditional.py). | |
| # | |
| # Usage (interactive): | |
| # bash scripts/shell/evaluate_conditional.sh outputs_conditional_2label_*/checkpoints/best_model.pt | |
| # CHECKPOINT=... OUTPUT_DIR=... bash scripts/shell/evaluate_conditional.sh | |
| # | |
| # Usage (SLURM): | |
| # cd .../Models/2param_DDPM_HI_Emulation && sbatch scripts/shell/evaluate_conditional.sh | |
| # (uses newest outputs_*/checkpoints/best_model.pt under this repo if CHECKPOINT is unset) | |
| # | |
| # Or pick a checkpoint explicitly: | |
| # sbatch --export=CHECKPOINT=/abs/path/to/best_model.pt scripts/shell/evaluate_conditional.sh | |
| # | |
| # Optional env vars: DATA_DIR, OUTPUT_DIR. Extra CLI flags go after the checkpoint path. | |
| set -euo pipefail | |
| cd <DDPM_ROOT>/Models/2param_DDPM_HI_Emulation | |
| module load python/miniconda3-py3.12-usr | |
| CHECKPOINT="${CHECKPOINT:-}" | |
| if [[ -z "${CHECKPOINT}" ]] && [[ -n "${1:-}" ]] && [[ "${1}" != -* ]]; then | |
| CHECKPOINT="$1" | |
| shift | |
| fi | |
| # Uncomment to pin a run when several outputs_* folders exist: | |
| # CHECKPOINT="outputs_conditional_2label_20260330_235542/checkpoints/best_model.pt" | |
| if [[ -z "${CHECKPOINT}" ]]; then | |
| # sbatch does not pass CLI args to the job script; pick latest best_model.pt by mtime | |
| _line="$(find "${PWD}" -maxdepth 8 -type f -name 'best_model.pt' -printf '%T@ %p\n' 2>/dev/null | sort -n | tail -n1)" || true | |
| if [[ -n "${_line}" ]]; then | |
| CHECKPOINT="${_line#* }" | |
| echo "Auto-selected checkpoint (newest best_model.pt): ${CHECKPOINT}" >&2 | |
| fi | |
| fi | |
| if [[ -z "${CHECKPOINT}" ]]; then | |
| echo "No checkpoint found. Set CHECKPOINT, pass a .pt path as arg 1, or train first." >&2 | |
| echo "SLURM example:" >&2 | |
| echo " sbatch --export=CHECKPOINT=${PWD}/outputs_conditional_2label_*/checkpoints/best_model.pt scripts/shell/evaluate_conditional.sh" >&2 | |
| exit 1 | |
| fi | |
| if [[ ! -f "${CHECKPOINT}" ]]; then | |
| echo "Checkpoint file not found: ${CHECKPOINT}" >&2 | |
| exit 1 | |
| fi | |
| DATA_DIR="${DATA_DIR:-<DDPM_ROOT>/data/LH_data/params_2}" | |
| OUTPUT_DIR="${OUTPUT_DIR:-evaluation_outputs}" | |
| echo "===============================================" | |
| echo "Job ID: ${SLURM_JOB_ID:-local}" | |
| echo "Node: ${SLURM_NODELIST:-$(hostname)}" | |
| echo "GPU: ${CUDA_VISIBLE_DEVICES:-default}" | |
| echo "Started: $(date)" | |
| echo "Checkpoint: ${CHECKPOINT}" | |
| echo "Output dir: ${OUTPUT_DIR}" | |
| echo "===============================================" | |
| python evaluate_conditional.py \ | |
| --checkpoint "${CHECKPOINT}" \ | |
| --data_dir "${DATA_DIR}" \ | |
| --output_dir "${OUTPUT_DIR}" \ | |
| --split test \ | |
| --num_samples 8 \ | |
| --ddim_steps 50 \ | |
| "$@" | |
| echo "===============================================" | |
| echo "Evaluation finished: $(date)" | |
| echo "===============================================" | |