Xsmos commited on
Commit
f894fa6
·
verified ·
1 Parent(s): 2cf3f12
Files changed (2) hide show
  1. diffusion.py +1 -1
  2. perlmutter_diffusion.sbatch +3 -3
diffusion.py CHANGED
@@ -278,7 +278,7 @@ class TrainConfig:
278
  # seed = 0
279
  # save_dir = './outputs/'
280
 
281
- save_period = 5 #np.infty #n_epoch // 2 #np.infty#.1 # the period of sampling
282
  # general parameters for the name and logger
283
  # device = "cuda" if torch.cuda.is_available() else "cpu"
284
  lrate = 1e-4
 
278
  # seed = 0
279
  # save_dir = './outputs/'
280
 
281
+ save_period = 10 #np.infty #n_epoch // 2 #np.infty#.1 # the period of sampling
282
  # general parameters for the name and logger
283
  # device = "cuda" if torch.cuda.is_available() else "cpu"
284
  lrate = 1e-4
perlmutter_diffusion.sbatch CHANGED
@@ -5,7 +5,7 @@
5
  #SBATCH -q regular #shared
6
  #SBATCH -N1
7
  #SBATCH --gpus-per-node=4
8
- #SBATCH -t 16:00
9
  #SBATCH --ntasks-per-node=1
10
  #SBATCH -oReport-%j
11
  #SBATCH --mail-type=BEGIN,END,FAIL
@@ -39,7 +39,7 @@ srun python diffusion.py \
39
  --use_checkpoint 1 \
40
  --dropout 0.2 \
41
  --lrate 2e-5 \
42
- --train "$SCRATCH/LEN128-DIM64-CUB16-Tvir[4, 6]-zeta[10, 250]-0809-123640.h5" \
43
- #--resume ./outputs/model-N1600-device_count4-node4-epoch19-32257242 \
44
 
45
  date
 
5
  #SBATCH -q regular #shared
6
  #SBATCH -N1
7
  #SBATCH --gpus-per-node=4
8
+ #SBATCH -t 3:00:00
9
  #SBATCH --ntasks-per-node=1
10
  #SBATCH -oReport-%j
11
  #SBATCH --mail-type=BEGIN,END,FAIL
 
39
  --use_checkpoint 1 \
40
  --dropout 0.2 \
41
  --lrate 2e-5 \
42
+ --resume ./outputs/model-N1600-device_count4-node4-epoch14-32353762 \
43
+ #--train "$SCRATCH/LEN128-DIM64-CUB16-Tvir[4, 6]-zeta[10, 250]-0809-123640.h5" \
44
 
45
  date