16094741
Browse files- diffusion.py +2 -10
- perlmutter_diffusion.sbatch +2 -2
- quantify_results.ipynb +0 -0
diffusion.py
CHANGED
|
@@ -429,10 +429,6 @@ class DDPM21CM:
|
|
| 429 |
self.ema_model = copy.deepcopy(self.nn_model).eval().requires_grad_(False)
|
| 430 |
|
| 431 |
self.optimizer = torch.optim.AdamW(self.nn_model.parameters(), lr=config.lrate)
|
| 432 |
-
#self.lr_scheduler = get_cosine_schedule_with_warmup(
|
| 433 |
-
# optimizer=self.optimizer,
|
| 434 |
-
# num_training_steps=int(config.num_image / config.batch_size * config.n_epoch / config.gradient_accumulation_steps),
|
| 435 |
-
#)
|
| 436 |
self.lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
|
| 437 |
optimizer = self.optimizer,
|
| 438 |
T_max = int(config.num_image / config.batch_size * config.n_epoch / config.gradient_accumulation_steps),
|
|
@@ -569,16 +565,12 @@ class DDPM21CM:
|
|
| 569 |
else:
|
| 570 |
c = c.to(self.config.device)
|
| 571 |
noise_pred = self.nn_model(xt, ts, c).to(x.dtype)
|
| 572 |
-
|
| 573 |
-
# print("noise_pred = self.nn_model(xt, ts, c), noise_pred.dtype =", noise_pred.dtype, noise.dtype)
|
| 574 |
|
| 575 |
loss = F.mse_loss(noise, noise_pred)
|
| 576 |
loss = loss / self.config.gradient_accumulation_steps
|
| 577 |
loss.backward()
|
| 578 |
-
|
| 579 |
-
|
| 580 |
-
#self.accelerator.clip_grad_norm_(self.nn_model.parameters(), 1)
|
| 581 |
-
if (i+i) % self.config.gradient_accumulation_steps == 0:
|
| 582 |
torch.nn.utils.clip_grad_norm_(self.nn_model.parameters(), max_norm=1.0)
|
| 583 |
self.optimizer.step()
|
| 584 |
self.lr_scheduler.step()
|
|
|
|
| 429 |
self.ema_model = copy.deepcopy(self.nn_model).eval().requires_grad_(False)
|
| 430 |
|
| 431 |
self.optimizer = torch.optim.AdamW(self.nn_model.parameters(), lr=config.lrate)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 432 |
self.lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
|
| 433 |
optimizer = self.optimizer,
|
| 434 |
T_max = int(config.num_image / config.batch_size * config.n_epoch / config.gradient_accumulation_steps),
|
|
|
|
| 565 |
else:
|
| 566 |
c = c.to(self.config.device)
|
| 567 |
noise_pred = self.nn_model(xt, ts, c).to(x.dtype)
|
|
|
|
|
|
|
| 568 |
|
| 569 |
loss = F.mse_loss(noise, noise_pred)
|
| 570 |
loss = loss / self.config.gradient_accumulation_steps
|
| 571 |
loss.backward()
|
| 572 |
+
|
| 573 |
+
if (i+1) % self.config.gradient_accumulation_steps == 0:
|
|
|
|
|
|
|
| 574 |
torch.nn.utils.clip_grad_norm_(self.nn_model.parameters(), max_norm=1.0)
|
| 575 |
self.optimizer.step()
|
| 576 |
self.lr_scheduler.step()
|
perlmutter_diffusion.sbatch
CHANGED
|
@@ -5,7 +5,7 @@
|
|
| 5 |
#SBATCH -q shared #regular
|
| 6 |
#SBATCH -N1
|
| 7 |
#SBATCH --gpus-per-node=1
|
| 8 |
-
#SBATCH -t 0:
|
| 9 |
#SBATCH --ntasks-per-node=1
|
| 10 |
#SBATCH -oReport-%j
|
| 11 |
#SBATCH --mail-type=BEGIN,END,FAIL
|
|
@@ -42,6 +42,6 @@ srun python diffusion.py \
|
|
| 42 |
--gradient_accumulation_steps 10 \
|
| 43 |
--num_new_img_per_gpu 800 \
|
| 44 |
--max_num_img_per_gpu 80 \
|
| 45 |
-
#--resume outputs/model-N3200-
|
| 46 |
|
| 47 |
date
|
|
|
|
| 5 |
#SBATCH -q shared #regular
|
| 6 |
#SBATCH -N1
|
| 7 |
#SBATCH --gpus-per-node=1
|
| 8 |
+
#SBATCH -t 0:30:00
|
| 9 |
#SBATCH --ntasks-per-node=1
|
| 10 |
#SBATCH -oReport-%j
|
| 11 |
#SBATCH --mail-type=BEGIN,END,FAIL
|
|
|
|
| 42 |
--gradient_accumulation_steps 10 \
|
| 43 |
--num_new_img_per_gpu 800 \
|
| 44 |
--max_num_img_per_gpu 80 \
|
| 45 |
+
#--resume outputs/model-N3200-device_count1-node1-epoch99-07213338 \
|
| 46 |
|
| 47 |
date
|
quantify_results.ipynb
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|