0719-1205

Files changed (3) hide show

context_unet.py CHANGED Viewed

@@ -330,7 +330,7 @@ class ContextUnet(nn.Module):
             elif image_size == 128:
                 channel_mult = (1, 1, 2, 3, 4)
             elif image_size == 64:
-                channel_mult = (1, 2, 4, 6, 8)#(1, 2, 2, 4)#(1, 2, 8, 8, 8)#(1, 2, 4)#(1, 2, 2, 4)#(0.5,1,2,2,4,4)#(1, 1, 2, 2, 4, 4)#
             elif image_size == 32:
                 channel_mult = (1, 2, 2, 4)
             elif image_size == 28:

             elif image_size == 128:
                 channel_mult = (1, 1, 2, 3, 4)
             elif image_size == 64:
+                channel_mult = (1, 2, 3, 4)#(1, 2, 4, 6, 8)#(1, 2, 2, 4)#(1, 2, 8, 8, 8)#(1, 2, 4)#(1, 2, 2, 4)#(0.5,1,2,2,4,4)#(1, 1, 2, 2, 4, 4)#
             elif image_size == 32:
                 channel_mult = (1, 2, 2, 4)
             elif image_size == 28:

diffusion.py CHANGED Viewed

@@ -23,6 +23,8 @@
 # - it takes 62 mins to generated 8 images with shape of (64,64,64), which is even slower than simulation, which takes ~5 mins for each image. Besides, the batch_size during training and num of images to be generated are limited to be 2 and 8, respectively.
 # - the slowerness can be solved by using multi-GPUs, and the limited-num-of-images can be solved by multi-accuracy, multi-GPUs.
 # - In addtion, the performance of DDPM can looks better compared to computation-intensive simulations.
 # %%
 from dataclasses import dataclass
@@ -581,7 +583,7 @@ class DDPM21CM:
         return x_last
 # %%
-num_train_image_list = [4000]
 def train(rank, world_size):
     config = TrainConfig()
@@ -602,7 +604,7 @@ def train(rank, world_size):
 if __name__ == "__main__":
-    world_size = torch.cuda.device_count()
     print(f" training, world_size = {world_size} ".center(100,'-'))
     # torch.multiprocessing.set_start_method("spawn")
     # args = (config, nn_model, ddpm, optimizer, dataloader, lr_scheduler)
@@ -667,7 +669,7 @@ if __name__ == "__main__":
     print(f" sampling, world_size = {world_size} ".center(100,'-'))
     # num_train_image_list = [1600,3200,6400,12800,25600]
     # num_train_image_list = [5000]
-    num_new_img_per_gpu = 40
     max_num_img_per_gpu = 20
     params = torch.tensor([4.4, 131.341])

 # - it takes 62 mins to generated 8 images with shape of (64,64,64), which is even slower than simulation, which takes ~5 mins for each image. Besides, the batch_size during training and num of images to be generated are limited to be 2 and 8, respectively.
 # - the slowerness can be solved by using multi-GPUs, and the limited-num-of-images can be solved by multi-accuracy, multi-GPUs.
 # - In addtion, the performance of DDPM can looks better compared to computation-intensive simulations.
+# 1 GPU, batch_size = 10, num_image = 3200, 50s for each epoch
+# 4 GPU, batch_size = 10, num_image = 3200,
 # %%
 from dataclasses import dataclass
         return x_last
 # %%
+num_train_image_list = [3200]
 def train(rank, world_size):
     config = TrainConfig()
 if __name__ == "__main__":
+    world_size = 1#torch.cuda.device_count()
     print(f" training, world_size = {world_size} ".center(100,'-'))
     # torch.multiprocessing.set_start_method("spawn")
     # args = (config, nn_model, ddpm, optimizer, dataloader, lr_scheduler)
     print(f" sampling, world_size = {world_size} ".center(100,'-'))
     # num_train_image_list = [1600,3200,6400,12800,25600]
     # num_train_image_list = [5000]
+    num_new_img_per_gpu = 400
     max_num_img_per_gpu = 20
     params = torch.tensor([4.4, 131.341])

quantify_results.ipynb CHANGED Viewed

The diff for this file is too large to render. See raw diff