Upload 2-parameter conditional DDPM (HI emulation, CAMELS LH params_2, epoch 200) with full training/eval/posterior toolchain
f513198 verified | #SBATCH --account=<your-slurm-account> | |
| #SBATCH --partition=l40s | |
| #SBATCH --nodes=1 | |
| #SBATCH --ntasks=8 | |
| #SBATCH --gres=gpu:l40s:1 | |
| #SBATCH --time=48:00:00 | |
| #SBATCH --job-name=ddpm_hi_april26 | |
| #SBATCH --mail-user=<your-email> # replace before submitting | |
| #SBATCH --output=slurm-%j.out | |
| #SBATCH --error=slurm-%j.err | |
| # Project root (this repo) | |
| cd <DDPM_ROOT>/Models/2param_DDPM_HI_Emulation | |
| module load python/miniconda3-py3.12-usr | |
| echo "===============================================" | |
| echo "Job ID: $SLURM_JOB_ID" | |
| echo "Job Name: $SLURM_JOB_NAME" | |
| echo "Node: $SLURM_NODELIST" | |
| echo "GPU: $CUDA_VISIBLE_DEVICES" | |
| echo "Starting Time: $(date)" | |
| echo "Conditional diffusion training (DDPM_HI_Emulation_improved)" | |
| echo "===============================================" | |
| python train_conditional.py \ | |
| --label_dim 2 \ | |
| --timesteps 1500 \ | |
| --use_ddim \ | |
| --ddim_steps 50 \ | |
| --normalize_labels \ | |
| --batch_size 8 \ | |
| --epochs 200 \ | |
| --lr 2e-4 \ | |
| --early_stop_patience 100 \ | |
| --sample_every 100 \ | |
| --base_channels 64 \ | |
| --channel_multipliers 1 2 4 8 \ | |
| --attention_levels 2 3 \ | |
| --data_dir <DDPM_ROOT>/data/LH_data/params_2 \ | |
| --output_dir outputs_conditional_2label | |
| # To resume (e.g. epoch 100 → 150): use scripts/shell/resume_conditional_epoch100_50more.sh | |
| # or add --epochs <new_total> --resume .../checkpoint_epoch_N.pt --resume_refresh_scheduler | |
| echo "===============================================" | |
| echo "Training completed at: $(date)" | |
| echo "===============================================" | |