Xsmos
/

ml21cm

TensorBoard

generate 21cm lightcones

denoising diffusion probabilistic model

Model card Files Files and versions

xet

Metrics Training metrics Community

Xsmos commited on Jul 17, 2024

Commit

1aea5e1

verified ·

1 Parent(s): 5ea9467

0716-2131

Browse files

Files changed (3) hide show

context_unet.py +1 -1
diffusion.py +82 -55
quantify_results.ipynb +26 -7

context_unet.py CHANGED Viewed

@@ -330,7 +330,7 @@ class ContextUnet(nn.Module):
             elif image_size == 128:
                 channel_mult = (1, 1, 2, 3, 4)
             elif image_size == 64:
-                channel_mult = (1, 2, 2, 4, 4)#(1, 2, 2, 4)#(1, 2, 8, 8, 8)#(1, 2, 4)#(1, 2, 2, 4)#(0.5,1,2,2,4,4)#(1, 1, 2, 2, 4, 4)#
             elif image_size == 32:
                 channel_mult = (1, 2, 2, 4)
             elif image_size == 28:

             elif image_size == 128:
                 channel_mult = (1, 1, 2, 3, 4)
             elif image_size == 64:
+                channel_mult = (1, 2, 4, 4, 4)#(1, 2, 2, 4)#(1, 2, 8, 8, 8)#(1, 2, 4)#(1, 2, 2, 4)#(0.5,1,2,2,4,4)#(1, 1, 2, 2, 4, 4)#
             elif image_size == 32:
                 channel_mult = (1, 2, 2, 4)
             elif image_size == 28:

diffusion.py CHANGED Viewed

@@ -239,7 +239,7 @@ class TrainConfig:
     stride = (2,2) if dim == 2 else (2,2,4)
     num_image = 1000#32000#20000#15000#7000#25600#3000#10000#1000#10000#5000#2560#800#2560
     batch_size = 1#2#50#20#2#100 # 10
-    n_epoch = 4# 10#50#20#20#2#5#25 # 120
     HII_DIM = 64
     num_redshift = 512#128#64#512#256#256#64#512#128
     channel = 1
@@ -508,7 +508,10 @@ class DDPM21CM:
     #     for i, from_ranges in self.ranges_dict[type].items():
     #         value[i] = (value[i] - from_ranges[0])/(from_ranges[1]-from_ranges[0]) # normalize
     #         value[i] =
-    def rescale(self, value, ranges, to: list):
         if value.ndim == 1:
             value = value.view(-1,len(value))
@@ -518,20 +521,21 @@ class DDPM21CM:
         value = value * (to[1]-to[0]) + to[0]
         return value
-    def sample(self, params:torch.tensor=None, num_new_img=192, ema=False, entire=False, save=False):
         # n_sample = params.shape[0]
         # file = self.config.resume
         if params is None:
-            params = torch.tensor([0.20000000000000018, 0.5055875000000001])
-            params_backup = params.numpy().copy()
-        else:
-            params_backup = params.numpy().copy()
-            params = self.rescale(params, self.ranges_dict['params'], to=[0,1])
-        print(f"device {torch.cuda.current_device()} sampling {num_new_img} images with normalized params = {params}")
-        params = params.repeat(num_new_img,1)
-        assert params.dim() == 2, "params must be a 2D torch.tensor"
         # print("params =", params)
         # print("params =", params)
         # print("len(params) =", len(params))
@@ -557,18 +561,24 @@ class DDPM21CM:
         with torch.no_grad():
             x_last, x_entire = self.ddpm.sample(
                 nn_model=self.nn_model,
-                params=params.to(self.config.device),
                 device=self.config.device,
                 guide_w=self.config.guide_w
                 )
         if save:
             # np.save(os.path.join(self.config.output_dir, f"{self.config.run_name}{'ema' if ema else ''}.npy"), x_last)
-            np.save(os.path.join(self.config.output_dir, f"Tvir{params_backup[0]}-zeta{params_backup[1]}-N{self.config.num_image}{'ema' if ema else ''}.npy"), x_last)
             if entire:
-                np.save(os.path.join(self.config.output_dir, f"Tvir{params_backup[0]}-zeta{params_backup[1]}-N{self.config.num_image}{'ema' if ema else ''}_entire.npy"), x_last)
-        else:
-            return x_last
 # %%
 def train(rank, world_size):
     config = TrainConfig()
@@ -576,8 +586,8 @@ def train(rank, world_size):
     ddp_setup(rank, world_size)
-    num_image_list = [2000]#[200]#[1600,3200,6400,12800,25600]
-    for i, num_image in enumerate(num_image_list):
         config.num_image = num_image
         # config.world_size = world_size
@@ -614,68 +624,85 @@ if __name__ == "__main__":
 # %%
-def generate_samples(ddpm21cm, num_new_img, max_num_img_per_gpu, rank, world_size):
-    samples = []
-    for _ in range(num_new_img // max_num_img_per_gpu):
-        sample = ddpm21cm.sample(params=torch.tensor([4.4, 131.341]), num_new_img=max_num_img_per_gpu)
-        samples.append(sample)
-        # ddpm21cm.sample(params=torch.tensor((5.6, 19.037)), num_new_img=max_num_img_per_gpu)
-        # ddpm21cm.sample(params=torch.tensor((4.699, 30)), num_new_img=max_num_img_per_gpu)
-        # ddpm21cm.sample(params=torch.tensor((5.477, 200)), num_new_img=max_num_img_per_gpu)
-        # ddpm21cm.sample(params=torch.tensor((4.8, 131.341)), num_new_img=max_num_img_per_gpu)
-    samples = np.concatenate(samples, axis=0)
-    samples_list = [np.empty_like(samples) for _ in range(world_size)]
-    dist.all_gather_object(samples_list, samples)
-    if rank == 0:
-        all_samples = np.concatenate(samples_list, axis=0)
-        return all_samples
-    else:
-        return None
-def sample(rank, world_size, config, num_new_img, max_num_img_per_gpu, return_dict):
     ddp_setup(rank, world_size)
     ddpm21cm = DDPM21CM(config)
-    samples = generate_samples(ddpm21cm, num_new_img, max_num_img_per_gpu, rank, world_size)
     # print(f"device {torch.cuda.current_device()}, rank = {rank}, keys = {return_dict.keys()}, samples.shape = {np.shape(samples)}")
-    if rank == 0:
-        return_dict['samples'] = samples
     # print(f"device {torch.cuda.current_device()}, rank = {rank}, keys = {return_dict.keys()}")
     dist.destroy_process_group()
-if __name__ == False:#"__main__":
-    print(" sampling ".center(100,'-'))
     world_size = torch.cuda.device_count()
-    # num_image_list = [1600,3200,6400,12800,25600]
-    num_image_list = [10]
-    num_new_img = 4
-    max_num_img_per_gpu = 2
     # print("config = TrainConfig()")
     config = TrainConfig()
     config.world_size = world_size
     # print("config.world_size = world_size")
-    for num_image in num_image_list:
         config.num_image = num_image
-        config.resume = f"./outputs/model_state-N{num_image}-epoch1-device0"
         # print("ddpm21cm = DDPM21CM(config)")
         manager = mp.Manager()
         return_dict = manager.dict()
-        mp.spawn(sample, args=(world_size, config, num_new_img, max_num_img_per_gpu, return_dict), nprocs=world_size, join=True)
         # print("---"*30)
         # print(f"device {torch.cuda.current_device()}, keys = {return_dict.keys()}")
-        if "samples" in return_dict:
-            samples = return_dict["samples"]
-            print(f"device {torch.cuda.current_device()} generated samples shape: {samples.shape}")
 # %%

     stride = (2,2) if dim == 2 else (2,2,4)
     num_image = 1000#32000#20000#15000#7000#25600#3000#10000#1000#10000#5000#2560#800#2560
     batch_size = 1#2#50#20#2#100 # 10
+    n_epoch = 8#4# 10#50#20#20#2#5#25 # 120
     HII_DIM = 64
     num_redshift = 512#128#64#512#256#256#64#512#128
     channel = 1
     #     for i, from_ranges in self.ranges_dict[type].items():
     #         value[i] = (value[i] - from_ranges[0])/(from_ranges[1]-from_ranges[0]) # normalize
     #         value[i] =
+    def rescale(self, params, ranges, to: list):
+        # value = np.array(params).copy()
+        value = params.clone()
         if value.ndim == 1:
             value = value.view(-1,len(value))
         value = value * (to[1]-to[0]) + to[0]
         return value
+    def sample(self, params:torch.tensor=None, num_new_img_per_gpu=192, ema=False, entire=False, save=True):
         # n_sample = params.shape[0]
         # file = self.config.resume
+        print(f"device {torch.cuda.current_device()}, sample, params = {params}")
         if params is None:
+            params = torch.tensor([4.4, 131.341])
+            # params_backup = params.numpy().copy()
+        # else:
+        params_backup = params.numpy().copy()
+        params_normalized = self.rescale(params, self.ranges_dict['params'], to=[0,1])
+        print(f"device {torch.cuda.current_device()} sampling {num_new_img_per_gpu} images with normalized params = {params_normalized}")
+        params_normalized = params_normalized.repeat(num_new_img_per_gpu,1)
+        assert params_normalized.dim() == 2, "params_normalized must be a 2D torch.tensor"
         # print("params =", params)
         # print("params =", params)
         # print("len(params) =", len(params))
         with torch.no_grad():
             x_last, x_entire = self.ddpm.sample(
                 nn_model=self.nn_model,
+                params=params_normalized.to(self.config.device),
                 device=self.config.device,
                 guide_w=self.config.guide_w
                 )
         if save:
             # np.save(os.path.join(self.config.output_dir, f"{self.config.run_name}{'ema' if ema else ''}.npy"), x_last)
+            savetime = datetime.datetime.now().strftime("%m%d-%H%M")
+            savename = os.path.join(self.config.output_dir, f"Tvir{params_backup[0]}-zeta{params_backup[1]}-N{self.config.num_image}-device{torch.cuda.current_device()}-{savetime}{'ema' if ema else ''}.npy")
+            print(f"saving {savename} ...")
+            np.save(savename, x_last)
             if entire:
+                savename = os.path.join(self.config.output_dir, f"Tvir{params_backup[0]}-zeta{params_backup[1]}-N{self.config.num_image}-device{torch.cuda.current_device()}-{savetime}{'ema' if ema else ''}_entire.npy")
+                print(f"saving {savename} ...")
+                np.save(savename, x_entire)
+        # else:
+        return x_last
 # %%
 def train(rank, world_size):
     config = TrainConfig()
     ddp_setup(rank, world_size)
+    num_train_image_list = [10]#[200]#[1600,3200,6400,12800,25600]
+    for i, num_image in enumerate(num_train_image_list):
         config.num_image = num_image
         # config.world_size = world_size
 # %%
+# def generate_samples(ddpm21cm, num_new_img_per_gpu, max_num_img_per_gpu, rank, world_size, params):
+#     # samples = []
+#     for _ in range(num_new_img_per_gpu // max_num_img_per_gpu):
+#         sample = ddpm21cm.sample(
+#             params=params,
+#             num_new_img_per_gpu=max_num_img_per_gpu
+#             )
+#         print(f"device {torch.cuda.current_device()} generated sample of shape: {sample.shape}")
+#         # samples.append(sample)
+#         # ddpm21cm.sample(params=torch.tensor((5.6, 19.037)), num_new_img_per_gpu=max_num_img_per_gpu)
+#         # ddpm21cm.sample(params=torch.tensor((4.699, 30)), num_new_img_per_gpu=max_num_img_per_gpu)
+#         # ddpm21cm.sample(params=torch.tensor((5.477, 200)), num_new_img_per_gpu=max_num_img_per_gpu)
+#         # ddpm21cm.sample(params=torch.tensor((4.8, 131.341)), num_new_img_per_gpu=max_num_img_per_gpu)
+#     # samples = np.concatenate(samples, axis=0)
+#     # samples_list = [np.empty_like(samples) for _ in range(world_size)]
+#     # dist.all_gather_object(samples_list, samples)
+#     # if rank == 0:
+#     #     all_samples = np.concatenate(samples_list, axis=0)
+#     #     return all_samples
+#     # else:
+#     #     return None
+def generate_samples(rank, world_size, config, num_new_img_per_gpu, max_num_img_per_gpu, return_dict, params):
     ddp_setup(rank, world_size)
     ddpm21cm = DDPM21CM(config)
+    # generate_samples(ddpm21cm, num_new_img_per_gpu, max_num_img_per_gpu, rank, world_size, params)
+    # samples = []
+    for _ in range(num_new_img_per_gpu // max_num_img_per_gpu):
+        sample = ddpm21cm.sample(
+            params=params,
+            num_new_img_per_gpu=max_num_img_per_gpu
+            )
+        print(f"device {torch.cuda.current_device()} generated sample of shape: {sample.shape}")
     # print(f"device {torch.cuda.current_device()}, rank = {rank}, keys = {return_dict.keys()}, samples.shape = {np.shape(samples)}")
+    # if rank == 0:
+    #     return_dict['samples'] = samples
     # print(f"device {torch.cuda.current_device()}, rank = {rank}, keys = {return_dict.keys()}")
     dist.destroy_process_group()
+if __name__ == "__main__":
     world_size = torch.cuda.device_count()
+    print(f" sampling, world_size = {world_size} ".center(100,'-'))
+    # num_train_image_list = [1600,3200,6400,12800,25600]
+    num_train_image_list = [2000]
+    num_new_img_per_gpu = 8
+    max_num_img_per_gpu = 1
+    params = torch.tensor([4.4, 131.341])
     # print("config = TrainConfig()")
     config = TrainConfig()
     config.world_size = world_size
     # print("config.world_size = world_size")
+    for num_image in num_train_image_list:
         config.num_image = num_image
+        config.resume = f"./outputs/model_state-N{num_image}-epoch3-device0"
         # print("ddpm21cm = DDPM21CM(config)")
         manager = mp.Manager()
         return_dict = manager.dict()
+        mp.spawn(generate_samples, args=(world_size, config, num_new_img_per_gpu, max_num_img_per_gpu, return_dict, params), nprocs=world_size, join=True)
         # print("---"*30)
         # print(f"device {torch.cuda.current_device()}, keys = {return_dict.keys()}")
+        # if "samples" in return_dict:
+        #     samples = return_dict["samples"]
+        #     print(f"device {torch.cuda.current_device()} generated samples shape: {samples.shape}")
 # %%

quantify_results.ipynb CHANGED Viewed

@@ -1971,24 +1971,43 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "metadata": {},
-   "outputs": [],
-   "source": []
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": []
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": []
   },
   {
    "cell_type": "code",

   },
   {
    "cell_type": "code",
+   "execution_count": 6,
    "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(1, 1, 64, 64, 512)\n"
+     ]
+    }
+   ],
+   "source": [
+    "import numpy as np\n",
+    "data = np.load('/storage/home/hcoda1/3/bxia34/p-jw254-0/ml21cm/outputs/Tvir4.400000095367432-zeta131.34100341796875-N2000-device0-0716-1726.npy')\n",
+    "print(data.shape)"
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
+   "source": [
+    "Tb = data[0,0]"
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": 8,
    "metadata": {},
    "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "for i in range(Tb.shape[-1]):\n",
+    "    plt.imshow(Tb[:,:,i])\n",
+    "    plt.savefig(f\"Tb{i:03d}.png\")"
+   ]
   },
   {
    "cell_type": "code",