#!/bin/bash #SBATCH --account= #SBATCH --partition=l40s #SBATCH --nodes=1 #SBATCH --ntasks=4 #SBATCH --gres=gpu:l40s:1 #SBATCH --time=08:00:00 #SBATCH --job-name=ddim2param #SBATCH --mail-user= # replace before submitting #SBATCH --output=slurm-ddim-invest-%j.out #SBATCH --error=slurm-ddim-invest-%j.err set -euo pipefail ROOT="/Models/2param_DDPM_HI_Emulation" cd "$ROOT" module load python/miniconda3-py3.12-usr CHECKPOINT="${CHECKPOINT:-/Models/notebook_model_weights/2param_epoch200/checkpoint_epoch_200.pt}" OUTPUT_DIR="${OUTPUT_DIR:-/Models/ddim_investigation_2param_out}" STEPS="${STEPS:-1500 1000 500 250 100 50}" NUM_SAMPLES="${NUM_SAMPLES:-100}" IMAGE_SIZE="${IMAGE_SIZE:-64}" DEVICE="${DEVICE:-cuda}" RUN_ID="${RUN_ID:-post-fix}" CAMELS_DATA="${CAMELS_DATA:-/data/LH_data/params_2/train_LH.npy}" PK_COMPARE_SAMPLES="${PK_COMPARE_SAMPLES:-100}" if [[ ! -f "${CHECKPOINT}" ]]; then echo "Checkpoint file not found: ${CHECKPOINT}" >&2 exit 1 fi echo "===============================================" echo "Job ID: ${SLURM_JOB_ID:-local}" echo "Node: ${SLURM_NODELIST:-$(hostname)}" echo "GPU: ${CUDA_VISIBLE_DEVICES:-default}" echo "Started: $(date)" echo "Checkpoint: ${CHECKPOINT}" echo "Output dir: ${OUTPUT_DIR}" echo "Steps: ${STEPS}" echo "Num samples: ${NUM_SAMPLES}" echo "CAMELS data: ${CAMELS_DATA}" echo "PK compare samples: ${PK_COMPARE_SAMPLES}" echo "===============================================" python ddim_investigation_2param.py \ --checkpoint "${CHECKPOINT}" \ --output_dir "${OUTPUT_DIR}" \ --steps ${STEPS} \ --num_samples "${NUM_SAMPLES}" \ --image_size "${IMAGE_SIZE}" \ --device "${DEVICE}" \ --camels_data "${CAMELS_DATA}" \ --pk_compare_samples "${PK_COMPARE_SAMPLES}" \ --run_id "${RUN_ID}" \ "$@" echo "===============================================" echo "Finished: $(date)" echo "Artifacts: ${OUTPUT_DIR}" echo "==============================================="