Xsmos
/

ml21cm

TensorBoard

generate 21cm lightcones

denoising diffusion probabilistic model

Model card Files Files and versions

xet

Metrics Training metrics Community

Xsmos commited on May 23, 2024

Commit

245c27a

verified ·

1 Parent(s): d69a5d7

0523-1704

Browse files

Files changed (1) hide show

diffusion.ipynb +43 -279

diffusion.ipynb CHANGED Viewed

@@ -244,13 +244,13 @@
     "    # dim = 2\n",
     "    dim = 2\n",
     "    stride = (2,2) if dim == 2 else (2,2,4)\n",
-    "    num_image = 240#0\n",
     "    HII_DIM = 64\n",
     "    num_redshift = 512#256#256#64#512#128\n",
     "    channel = 1\n",
     "    img_shape = (channel, HII_DIM, num_redshift) if dim == 2 else (channel, HII_DIM, HII_DIM, num_redshift)\n",
     "\n",
-    "    n_epoch = 10#2#5#25 # 120\n",
     "    num_timesteps = 1000#1000 # 1000, 500; DDPM time steps\n",
     "    batch_size = 10#20#2#100 # 10\n",
     "    # n_sample = 24 # 64, the number of samples in sampling process\n",
@@ -268,17 +268,17 @@
     "    # device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
     "    lrate = 1e-4\n",
     "    lr_warmup_steps = 0#5#00\n",
-    "    save_model = True\n",
     "    # save_freq = 1 #10 # the period of saving model\n",
     "    # cond = True # if training using the conditional information\n",
     "    # lr_decay = False #True# if using the learning rate decay\n",
-    "    resume = 'model_state.pth' # if resume from the trained checkpoints\n",
     "    # params_single = torch.tensor([0.2,0.80000023])\n",
     "    # params = torch.tile(params_single,(n_sample,1)).to(device)\n",
     "    # params =  params\n",
     "    # data_dir = './data' # data directory\n",
     "\n",
-    "    output_dir = \"./outputs/\"\n",
     "\n",
     "    mixed_precision = \"fp16\"\n",
     "    gradient_accumulation_steps = 1\n",
@@ -313,8 +313,9 @@
     "        # initialize the unet\n",
     "        self.nn_model = ContextUnet(n_param=config.n_param, image_size=config.HII_DIM, dim=config.dim, stride=config.stride)\n",
     "\n",
-    "        if config.resume:\n",
-    "            self.nn_model.load_state_dict(torch.load(os.path.join(config.output_dir, f\"{config.resume}\"))['unet_state_dict'])\n",
     "            print(f\"resumed nn_model from {config.resume}\")\n",
     "        # nn_model = ContextUnet(n_param=1, image_size=28)\n",
     "        self.nn_model.train()\n",
@@ -327,12 +328,12 @@
     "        # whether to use ema\n",
     "        if config.ema:\n",
     "            self.ema = EMA(config.ema_rate)\n",
-    "            if config.resume:\n",
     "                self.ema_model = ContextUnet(n_param=config.n_param, image_size=config.HII_DIM, dim=config.dim, stride=config.stride).to(config.device)\n",
-    "                self.ema_model.load_state_dict(torch.load(os.path.join(config.output_dir, f\"{config.resume}\"))['ema_unet_state_dict'])\n",
     "                print(f\"resumed ema_model from {config.resume}\")\n",
     "            else:\n",
-    "                self.ema_model = copy.deepcopy(nn_model).eval().requires_grad_(False)\n",
     "\n",
     "        self.optimizer = torch.optim.AdamW(self.nn_model.parameters(), lr=config.lrate)\n",
     "        self.lr_scheduler = get_cosine_schedule_with_warmup(\n",
@@ -439,14 +440,14 @@
     "                            commit_message = f\"{self.config.run_name}\",\n",
     "                            ignore_patterns = [\"step_*\", \"epoch_*\", \"*.npy\", \"__pycache__\"],\n",
     "                            )\n",
-    "                    if self.config.save_model:\n",
     "                        model_state = {\n",
     "                            'epoch': ep,\n",
     "                            'unet_state_dict': self.nn_model.state_dict(),\n",
     "                            'ema_unet_state_dict': self.ema_model.state_dict(),\n",
     "                            }\n",
-    "                        torch.save(model_state, self.config.output_dir + f\"model_state_{ep:02d}.pth\")\n",
-    "                        print('saved model at ' + self.config.output_dir + f\"model_state_{ep:02d}.pth\")\n",
     "                        # print('saved model at ' + config.save_dir + f\"model_epoch_{ep}_test_{config.run_name}.pth\")\n",
     "\n",
     "    def sample(self, file, params:torch.tensor=None, ema=False, entire=False):\n",
@@ -498,7 +499,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "6dca1df1da3148f28c71fed756c7abc9",
        "version_major": 2,
        "version_minor": 0
       },
@@ -508,196 +509,31 @@
      },
      "metadata": {},
      "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "resumed nn_model from model_state.pth\n",
-      "Number of parameters for nn_model: 111048705\n",
-      "resumed ema_model from model_state.pth\n",
-      "run_name = 0523-1621\n",
-      "Launching training on one GPU.\n",
-      "dataset content: <KeysViewHDF5 ['brightness_temp', 'density', 'kwargs', 'params', 'redshifts_distances', 'seeds', 'xH_box']>\n",
-      "51200 images can be loaded\n",
-      "field.shape = (64, 64, 514)\n",
-      "params keys = [b'ION_Tvir_MIN', b'HII_EFF_FACTOR']\n",
-      "loading 240 images randomly\n",
-      "images loaded: (240, 1, 64, 512)\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Detected kernel version 3.10.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "params loaded: (240, 2)\n",
-      "images rescaled to [-1.0, 1.1240839958190918]\n",
-      "params rescaled to [0.0, 0.9972546078293054]\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "15d75d83ca9f4f49be17a89f6ddd58e1",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/24 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "66959c994f6b40649ab527212de8d3c2",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/24 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "564f6d85e359481f973a49f75b180440",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/24 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "079a2325ab83494282c83b76ffb8e52e",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/24 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "fefa0f8dbfeb474d90e0aaf55f8ca5e8",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/24 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "b216c0bb3bd4457f9230b32b8d2ede1f",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/24 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "78d4bdad3dc34ba18f3074802c67bf61",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/24 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "e78d2d3247b442b78f06b38b65944887",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/24 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "5e1d909d5f3f4c26a11bd40978c57f4e",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/24 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "d1f56418378049b59ba1f9de7c5676f1",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/24 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "saved model at ./outputs/model_state_09.pth\n",
-      "resumed nn_model from model_state.pth\n",
       "Number of parameters for nn_model: 111048705\n",
-      "resumed ema_model from model_state.pth\n",
-      "run_name = 0523-1624\n",
       "Launching training on one GPU.\n",
       "dataset content: <KeysViewHDF5 ['brightness_temp', 'density', 'kwargs', 'params', 'redshifts_distances', 'seeds', 'xH_box']>\n",
       "51200 images can be loaded\n",
       "field.shape = (64, 64, 514)\n",
       "params keys = [b'ION_Tvir_MIN', b'HII_EFF_FACTOR']\n",
-      "loading 240 images randomly\n",
-      "images loaded: (240, 1, 64, 512)\n"
      ]
     },
     {
@@ -711,90 +547,19 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "params loaded: (240, 2)\n",
-      "images rescaled to [-1.0, 1.186443567276001]\n",
-      "params rescaled to [0.0, 0.9999922179553216]\n"
      ]
     },
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "75b0459ffb784e1d9e4070b7a424a506",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/24 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "c7e04c734ba3482eb44344f7a4e37916",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/24 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "9a8c702e844f4fbaa295fb8f6d21503b",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/24 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "062be80bffee4540b396159acc223e6e",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/24 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "3849ae228e284a1a8c01235ffe2691aa",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "  0%|          | 0/24 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "b359bb4eeb8b4ec58be692424d352164",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
-       "  0%|          | 0/24 [00:00<?, ?it/s]"
       ]
      },
      "metadata": {},
@@ -803,12 +568,12 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "06c56d6f2e1443fd87bfa949f092b8f0",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
-       "  0%|          | 0/24 [00:00<?, ?it/s]"
       ]
      },
      "metadata": {},
@@ -817,12 +582,12 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "b5b613300a7046c8a1a62b5237ab5b4e",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
-       "  0%|          | 0/24 [00:00<?, ?it/s]"
       ]
      },
      "metadata": {},
@@ -831,12 +596,12 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "6e76a18a0bce4ee9acd9e8344a81fd65",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
-       "  0%|          | 0/24 [00:00<?, ?it/s]"
       ]
      },
      "metadata": {},
@@ -845,12 +610,12 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "3ecf321272464c988e4a291b19d164e0",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
-       "  0%|          | 0/24 [00:00<?, ?it/s]"
       ]
      },
      "metadata": {},
@@ -860,8 +625,7 @@
    "source": [
     "if __name__ == \"__main__\":\n",
     "    # args = (config, nn_model, ddpm, optimizer, dataloader, lr_scheduler)\n",
-    "    notebook_login()\n",
-    "    repeat = 2\n",
     "    for i in range(repeat):\n",
     "        ddpm21cm = DDPM21CM()\n",
     "        print(f\"run_name = {ddpm21cm.config.run_name}\")\n",

     "    # dim = 2\n",
     "    dim = 2\n",
     "    stride = (2,2) if dim == 2 else (2,2,4)\n",
+    "    num_image = 2560\n",
     "    HII_DIM = 64\n",
     "    num_redshift = 512#256#256#64#512#128\n",
     "    channel = 1\n",
     "    img_shape = (channel, HII_DIM, num_redshift) if dim == 2 else (channel, HII_DIM, HII_DIM, num_redshift)\n",
     "\n",
+    "    n_epoch = 5#2#5#25 # 120\n",
     "    num_timesteps = 1000#1000 # 1000, 500; DDPM time steps\n",
     "    batch_size = 10#20#2#100 # 10\n",
     "    # n_sample = 24 # 64, the number of samples in sampling process\n",
     "    # device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
     "    lrate = 1e-4\n",
     "    lr_warmup_steps = 0#5#00\n",
+    "    output_dir = \"./outputs/\"\n",
+    "    save_name = os.path.join(output_dir, 'model_state.pth')\n",
     "    # save_freq = 1 #10 # the period of saving model\n",
     "    # cond = True # if training using the conditional information\n",
     "    # lr_decay = False #True# if using the learning rate decay\n",
+    "    resume = save_name # if resume from the trained checkpoints\n",
     "    # params_single = torch.tensor([0.2,0.80000023])\n",
     "    # params = torch.tile(params_single,(n_sample,1)).to(device)\n",
     "    # params =  params\n",
     "    # data_dir = './data' # data directory\n",
     "\n",
     "\n",
     "    mixed_precision = \"fp16\"\n",
     "    gradient_accumulation_steps = 1\n",
     "        # initialize the unet\n",
     "        self.nn_model = ContextUnet(n_param=config.n_param, image_size=config.HII_DIM, dim=config.dim, stride=config.stride)\n",
     "\n",
+    "        if config.resume and os.path.exists(config.resume):\n",
+    "            # resume_file = os.path.join(config.output_dir, f\"{config.resume}\")\n",
+    "            self.nn_model.load_state_dict(torch.load(config.resume)['unet_state_dict'])\n",
     "            print(f\"resumed nn_model from {config.resume}\")\n",
     "        # nn_model = ContextUnet(n_param=1, image_size=28)\n",
     "        self.nn_model.train()\n",
     "        # whether to use ema\n",
     "        if config.ema:\n",
     "            self.ema = EMA(config.ema_rate)\n",
+    "            if config.resume and os.path.exists(config.resume):\n",
     "                self.ema_model = ContextUnet(n_param=config.n_param, image_size=config.HII_DIM, dim=config.dim, stride=config.stride).to(config.device)\n",
+    "                self.ema_model.load_state_dict(torch.load(config.resume)['ema_unet_state_dict'])\n",
     "                print(f\"resumed ema_model from {config.resume}\")\n",
     "            else:\n",
+    "                self.ema_model = copy.deepcopy(self.nn_model).eval().requires_grad_(False)\n",
     "\n",
     "        self.optimizer = torch.optim.AdamW(self.nn_model.parameters(), lr=config.lrate)\n",
     "        self.lr_scheduler = get_cosine_schedule_with_warmup(\n",
     "                            commit_message = f\"{self.config.run_name}\",\n",
     "                            ignore_patterns = [\"step_*\", \"epoch_*\", \"*.npy\", \"__pycache__\"],\n",
     "                            )\n",
+    "                    if self.config.save_name:\n",
     "                        model_state = {\n",
     "                            'epoch': ep,\n",
     "                            'unet_state_dict': self.nn_model.state_dict(),\n",
     "                            'ema_unet_state_dict': self.ema_model.state_dict(),\n",
     "                            }\n",
+    "                        torch.save(model_state, self.config.save_name)\n",
+    "                        print('saved model at ' + self.config.save_name)\n",
     "                        # print('saved model at ' + config.save_dir + f\"model_epoch_{ep}_test_{config.run_name}.pth\")\n",
     "\n",
     "    def sample(self, file, params:torch.tensor=None, ema=False, entire=False):\n",
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "e0f355a0bc8b4592952af6c1ccd5d2fb",
        "version_major": 2,
        "version_minor": 0
       },
      },
      "metadata": {},
      "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "notebook_login()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
       "Number of parameters for nn_model: 111048705\n",
+      "run_name = 0523-1704\n",
       "Launching training on one GPU.\n",
       "dataset content: <KeysViewHDF5 ['brightness_temp', 'density', 'kwargs', 'params', 'redshifts_distances', 'seeds', 'xH_box']>\n",
       "51200 images can be loaded\n",
       "field.shape = (64, 64, 514)\n",
       "params keys = [b'ION_Tvir_MIN', b'HII_EFF_FACTOR']\n",
+      "loading 2560 images randomly\n",
+      "images loaded: (2560, 1, 64, 512)\n",
+      "params loaded: (2560, 2)\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "images rescaled to [-1.0, 1.1378462314605713]\n",
+      "params rescaled to [0.0, 0.9995994165819857]\n"
      ]
     },
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "4d787d2fbdcf4575b7b17a6e5161f5ec",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
+       "  0%|          | 0/256 [00:00<?, ?it/s]"
       ]
      },
      "metadata": {},
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "e67439e56e594ecfb3967edbfb3f0d60",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
+       "  0%|          | 0/256 [00:00<?, ?it/s]"
       ]
      },
      "metadata": {},
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "9ca7cb14960348fa8d83c90d773057ac",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
+       "  0%|          | 0/256 [00:00<?, ?it/s]"
       ]
      },
      "metadata": {},
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "a6368ae7b9fb4505b6b62d51c5d675ed",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
+       "  0%|          | 0/256 [00:00<?, ?it/s]"
       ]
      },
      "metadata": {},
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "d5a391c5bbfb4f6481c1f2ad6e754e24",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
+       "  0%|          | 0/256 [00:00<?, ?it/s]"
       ]
      },
      "metadata": {},
    "source": [
     "if __name__ == \"__main__\":\n",
     "    # args = (config, nn_model, ddpm, optimizer, dataloader, lr_scheduler)\n",
+    "    repeat = 30\n",
     "    for i in range(repeat):\n",
     "        ddpm21cm = DDPM21CM()\n",
     "        print(f\"run_name = {ddpm21cm.config.run_name}\")\n",