|
|
#!/bin/bash |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
date |
|
|
|
|
|
module load pytorch |
|
|
|
|
|
which python |
|
|
conda env list |
|
|
module list |
|
|
|
|
|
MASTER_ADDR=$(scontrol show hostnames $SLURM_JOB_NODELIST | head -n 1) |
|
|
MASTER_PORT=$((10000 + RANDOM % 10000)) |
|
|
|
|
|
export MASTER_ADDR=$MASTER_ADDR |
|
|
export MASTER_PORT=$MASTER_PORT |
|
|
export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True |
|
|
cat $0 |
|
|
|
|
|
srun python diffusion_test.py \ |
|
|
--num_image 16 \ |
|
|
--batch_size 2 \ |
|
|
--n_epoch 40 \ |
|
|
--channel_mult 1 1 2 2 4 4 \ |
|
|
--num_new_img_per_gpu 4 \ |
|
|
--max_num_img_per_gpu 2 \ |
|
|
--gradient_accumulation_steps 1 \ |
|
|
--autocast 1 \ |
|
|
--use_checkpoint 1 \ |
|
|
--dropout 0.1 \ |
|
|
--lrate 7e-5 \ |
|
|
--train "$SCRATCH/LEN128-DIM64-CUB16-Tvir[4, 6]-zeta[10, 250]-0809-123640.h5" \ |
|
|
|
|
|
|
|
|
date |
|
|
|