Xsmos
/

ml21cm

TensorBoard

generate 21cm lightcones

denoising diffusion probabilistic model

Model card Files Files and versions

xet

Metrics Training metrics Community

Xsmos commited on May 22, 2024

Commit

443fbc2

verified ·

1 Parent(s): 58b2929

0521-2013

Browse files

Files changed (1) hide show

diffusion.ipynb +105 -88

diffusion.ipynb CHANGED Viewed

@@ -32,7 +32,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "c9806d87a1f2404fb462189f2912d675",
        "version_major": 2,
        "version_minor": 0
       },
@@ -234,18 +234,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 90,
    "metadata": {},
    "outputs": [],
    "source": [
     "class DDPMScheduler(nn.Module):\n",
-    "    def __init__(self, betas: tuple, num_timesteps: int, device='cpu'):\n",
     "        super().__init__()\n",
     "        \n",
     "        beta_1, beta_T = betas\n",
     "        assert 0 < beta_1 <= beta_T <= 1, \"ensure 0 < beta_1 <= beta_T <= 1\"\n",
     "        self.device = device\n",
     "        self.num_timesteps = num_timesteps\n",
     "        self.beta_t = torch.linspace(beta_1, beta_T, self.num_timesteps) #* (beta_T-beta_1) + beta_1\n",
     "        self.beta_t = self.beta_t.to(self.device)\n",
     "\n",
@@ -277,7 +278,7 @@
     "    def sample(self, nn_model, params, device, guide_w = 0):\n",
     "        n_sample = len(params) #params.shape[0]\n",
     "        # print(\"params.shape[0], len(params)\", params.shape[0], len(params))\n",
-    "        x_i = torch.randn(n_sample, *self.shape[1:]).to(device)\n",
     "        # print(\"x_i.shape =\", x_i.shape)\n",
     "        if guide_w != -1:\n",
     "            c_i = params\n",
@@ -297,7 +298,7 @@
     "            t_is = torch.tensor([i]).to(device)\n",
     "            t_is = t_is.repeat(n_sample)\n",
     "\n",
-    "            z = torch.randn(n_sample, *self.shape[1:]).to(device) if i > 0 else 0\n",
     "\n",
     "            if guide_w == -1:\n",
     "                # eps = nn_model(x_i, t_is, return_dict=False)[0]\n",
@@ -305,7 +306,7 @@
     "                # x_i = 1/torch.sqrt(self.alpha_t[i])*(x_i-eps*self.beta_t[i]/torch.sqrt(1-self.bar_alpha_t[i])) + torch.sqrt(self.beta_t[i])*z\n",
     "            else:\n",
     "                # double batch\n",
-    "                x_i = x_i.repeat(2, *torch.ones(len(self.shape[1:]), dtype=int).tolist())\n",
     "                t_is = t_is.repeat(2)\n",
     "\n",
     "                # split predictions and compute weighting\n",
@@ -338,7 +339,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 91,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -380,23 +381,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 92,
    "metadata": {},
    "outputs": [],
    "source": [
     "class Downsample(nn.Module):\n",
-    "    def __init__(self, channels, use_conv, out_channels=None):\n",
     "        super().__init__()\n",
     "        self.channels = channels\n",
     "        self.out_channels = out_channels or channels\n",
-    "        stride = config.stride\n",
     "        if use_conv:\n",
     "            # print(\"conv\")\n",
-    "            self.op = Conv[config.dim](channels, self.out_channels, 3, stride=stride, padding=1)\n",
     "        else:\n",
     "            # print(\"pool\")\n",
     "            assert channels == self.out_channels\n",
-    "            self.op = AvgPool[config.dim](kernel_size=stride, stride=stride)\n",
     "\n",
     "    def forward(self, x):\n",
     "        assert x.shape[1] == self.channels\n",
@@ -405,25 +406,26 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 93,
    "metadata": {},
    "outputs": [],
    "source": [
     "class Upsample(nn.Module):\n",
-    "    def __init__(self, channels, use_conv, out_channels=None):\n",
     "        super().__init__()\n",
     "        self.channels = channels\n",
     "        self.out_channels = out_channels\n",
     "        self.use_conv = use_conv\n",
     "        if self.use_conv:\n",
-    "            self.conv = Conv[config.dim](self.channels, self.out_channels, 3, padding=1)\n",
     "\n",
     "    def forward(self, x):\n",
     "        assert x.shape[1] == self.channels\n",
-    "        stride = config.stride\n",
     "        # print(torch.tensor(x.shape[2:]))\n",
     "        # print(torch.tensor(stride))\n",
-    "        shape = torch.tensor(x.shape[2:]) * torch.tensor(stride)\n",
     "        shape = tuple(shape.detach().numpy())\n",
     "        # print(shape)\n",
     "        x = F.interpolate(x, shape, mode='nearest')\n",
@@ -434,7 +436,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 94,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -449,7 +451,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 95,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -463,7 +465,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 96,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -481,32 +483,33 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 97,
    "metadata": {},
    "outputs": [],
    "source": [
     "class ResBlock(TimestepBlock):\n",
     "    def __init__(\n",
-    "        self, channels, emb_channels, dropout, out_channels=None, use_conv=False, use_checkpoint=False, use_scale_shift_norm=False, up=False, down=False,\n",
     "        ):\n",
     "        super().__init__()\n",
     "        self.out_channels = out_channels or channels\n",
     "        self.use_scale_shift_norm = use_scale_shift_norm\n",
     "\n",
     "        self.in_layers = nn.Sequential(\n",
     "            # nn.BatchNorm2d(channels), # normalize to standard gaussian\n",
     "            normalization(channels, swish=1.0),\n",
     "            nn.Identity(),\n",
-    "            Conv[config.dim](channels, self.out_channels, 3, padding=1),\n",
     "            )\n",
     "\n",
     "        self.updown = up or down\n",
     "        if up:\n",
-    "            self.h_updown = Upsample(channels, False)\n",
-    "            self.x_updown = Upsample(channels, False)\n",
     "        elif down:\n",
-    "            self.h_updown = Downsample(channels, False)\n",
-    "            self.x_updown = Downsample(channels, False)\n",
     "        else:\n",
     "            self.h_updown = self.x_updown = nn.Identity()\n",
     "\n",
@@ -523,15 +526,15 @@
     "            normalization(self.out_channels, swish=0.0 if use_scale_shift_norm else 1.0),\n",
     "            nn.SiLU() if use_scale_shift_norm else nn.Identity(),\n",
     "            nn.Dropout(p=dropout),\n",
-    "            zero_module(Conv[config.dim](self.out_channels, self.out_channels, 3, padding=1)),\n",
     "        )\n",
     "\n",
     "        if self.out_channels == channels:\n",
     "            self.skip_connection = nn.Identity()\n",
     "        elif use_conv:\n",
-    "            self.skip_connection = Conv[config.dim](channels, self.out_channels, 3, padding=1)\n",
     "        else:\n",
-    "            self.skip_connection = Conv[config.dim](channels, self.out_channels, 1)\n",
     "        \n",
     "\n",
     "    def forward(self, x, emb):\n",
@@ -562,7 +565,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 98,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -595,7 +598,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 99,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -644,7 +647,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 100,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -673,7 +676,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 101,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -697,6 +700,8 @@
     "        resblock_updown = False,\n",
     "        conv_resample = True,\n",
     "        encoder_channels = None,\n",
     "        ):\n",
     "        super().__init__()\n",
     "\n",
@@ -742,7 +747,7 @@
     "\n",
     "        ###################### input_blocks ######################\n",
     "        self.input_blocks = nn.ModuleList(\n",
-    "            [TimestepEmbedSequential(Conv[config.dim](in_channels, ch, 3, padding=1))]\n",
     "        )\n",
     "        self._feature_size = ch\n",
     "        input_block_chans = [ch]\n",
@@ -758,6 +763,8 @@
     "                        out_channels = int(mult * model_channels),\n",
     "                        use_checkpoint = use_checkpoint,\n",
     "                        use_scale_shift_norm = use_scale_shift_norm,\n",
     "                    )\n",
     "                ]\n",
     "                ch = int(mult * model_channels)\n",
@@ -788,9 +795,11 @@
     "                            use_checkpoint=use_checkpoint,\n",
     "                            use_scale_shift_norm=use_scale_shift_norm,\n",
     "                            down=True,\n",
     "                        )\n",
     "                        if resblock_updown\n",
-    "                        else Downsample(ch, conv_resample, out_channels=out_ch)\n",
     "                    )\n",
     "                )\n",
     "                ch = out_ch\n",
@@ -807,6 +816,8 @@
     "                dropout,\n",
     "                use_checkpoint=use_checkpoint,\n",
     "                use_scale_shift_norm=use_scale_shift_norm,\n",
     "            ),\n",
     "            AttentionBlock(\n",
     "                ch,\n",
@@ -821,6 +832,8 @@
     "                dropout,\n",
     "                use_checkpoint=use_checkpoint,\n",
     "                use_scale_shift_norm=use_scale_shift_norm,\n",
     "            ),\n",
     "        )\n",
     "        self._feature_size += ch\n",
@@ -840,6 +853,8 @@
     "                        # dims=dims,\n",
     "                        use_checkpoint=use_checkpoint,\n",
     "                        use_scale_shift_norm=use_scale_shift_norm,\n",
     "                    )\n",
     "                ]\n",
     "                ch = int(model_channels * mult)\n",
@@ -866,9 +881,11 @@
     "                            use_checkpoint=use_checkpoint,\n",
     "                            use_scale_shift_norm=use_scale_shift_norm,\n",
     "                            up=True,\n",
     "                        )\n",
     "                        if resblock_updown\n",
-    "                        else Upsample(ch, conv_resample, out_channels=out_ch)\n",
     "                    )\n",
     "                    ds //= 2\n",
     "                self.output_blocks.append(TimestepEmbedSequential(*layers))\n",
@@ -878,7 +895,7 @@
     "            # nn.BatchNorm2d(ch),\n",
     "            normalization(ch, swish=1.0),\n",
     "            nn.Identity(),\n",
-    "            zero_module(Conv[config.dim](input_ch, out_channels, 3, padding=1)),\n",
     "        )\n",
     "        # self.use_fp16 = use_fp16\n",
     "\n",
@@ -915,7 +932,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 102,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -945,7 +962,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 123,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -963,7 +980,7 @@
     "    # dim = 2\n",
     "    dim = 2\n",
     "    stride = (2,2) if dim == 2 else (2,2,4)\n",
-    "    num_image = 20 # 2400\n",
     "    HII_DIM = 64\n",
     "    num_redshift = 512#256#256#64#512#128\n",
     "    img_shape = (HII_DIM, num_redshift) if dim == 2 else (HII_DIM, HII_DIM, num_redshift)\n",
@@ -1010,7 +1027,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 124,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1020,7 +1037,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 125,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1029,7 +1046,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 126,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1053,7 +1070,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 127,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1251,7 +1268,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 128,
    "metadata": {},
    "outputs": [
     {
@@ -1262,11 +1279,11 @@
       "51200 images can be loaded\n",
       "field.shape = (64, 64, 514)\n",
       "params keys = [b'ION_Tvir_MIN', b'HII_EFF_FACTOR']\n",
-      "loading 20 images randomly\n",
-      "images loaded: (20, 1, 64, 512)\n",
-      "params loaded: (20, 2)\n",
-      "images rescaled to [-1.0, 1.038496732711792]\n",
-      "params rescaled to [0.0, 0.9816321951033768]\n",
       "resumed nn_model from model_state.pth\n",
       "Number of parameters for nn_model: 111048705\n"
      ]
@@ -1298,10 +1315,10 @@
     "        self.dataloader = DataLoader(dataset, batch_size=config.train_batch_size, shuffle=True)\n",
     "        del dataset\n",
     "\n",
-    "        self.ddpm = DDPMScheduler(betas=(1e-4, 0.02), num_timesteps=config.num_timesteps, device=config.device)\n",
     "\n",
     "        # initialize the unet\n",
-    "        self.nn_model = ContextUnet(n_param=config.n_param, image_size=config.HII_DIM)\n",
     "\n",
     "        if config.resume:\n",
     "            self.nn_model.load_state_dict(torch.load(os.path.join(config.output_dir, f\"{config.resume}\"))['unet_state_dict'])\n",
@@ -1318,7 +1335,7 @@
     "        if config.ema:\n",
     "            self.ema = EMA(config.ema_rate)\n",
     "            if config.resume:\n",
-    "                self.ema_model = ContextUnet(n_param=config.n_param, image_size=config.HII_DIM).to(config.device)\n",
     "                self.ema_model.load_state_dict(torch.load(os.path.join(config.output_dir, f\"{config.resume}\"))['ema_unet_state_dict'])\n",
     "                print(f\"resumed ema_model from {config.resume}\")\n",
     "            else:\n",
@@ -1442,18 +1459,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 129,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "067df92056c8456aa796e3416bac122a",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
-       "  0%|          | 0/2 [00:00<?, ?it/s]"
       ]
      },
      "metadata": {},
@@ -1462,12 +1479,12 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "8e77d9787ee049e5896b1be75d34bf05",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
-       "  0%|          | 0/2 [00:00<?, ?it/s]"
       ]
      },
      "metadata": {},
@@ -1476,12 +1493,12 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "8211e85e22354d7da06f66786ff33d4a",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
-       "  0%|          | 0/2 [00:00<?, ?it/s]"
       ]
      },
      "metadata": {},
@@ -1490,12 +1507,12 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "31d068ad21c642468bb2a90c7af57c83",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
-       "  0%|          | 0/2 [00:00<?, ?it/s]"
       ]
      },
      "metadata": {},
@@ -1504,12 +1521,12 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "2ca6304e757f4c8696bacfc36692e791",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
-       "  0%|          | 0/2 [00:00<?, ?it/s]"
       ]
      },
      "metadata": {},
@@ -1518,12 +1535,12 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "7cc536030a784596995ec5130b7638c5",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
-       "  0%|          | 0/2 [00:00<?, ?it/s]"
       ]
      },
      "metadata": {},
@@ -1532,12 +1549,12 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "b415a15a942046f08e3e2c92404c14ad",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
-       "  0%|          | 0/2 [00:00<?, ?it/s]"
       ]
      },
      "metadata": {},
@@ -1546,12 +1563,12 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "2de1a814b7d34998b63eec43c1d43c12",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
-       "  0%|          | 0/2 [00:00<?, ?it/s]"
       ]
      },
      "metadata": {},
@@ -1560,12 +1577,12 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "2ae161b79b0d4e688b12432455a6c065",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
-       "  0%|          | 0/2 [00:00<?, ?it/s]"
       ]
      },
      "metadata": {},
@@ -1574,12 +1591,12 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "7497a93eb57a40e281141126947f78ae",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
-       "  0%|          | 0/2 [00:00<?, ?it/s]"
       ]
      },
      "metadata": {},
@@ -1592,7 +1609,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 116,
    "metadata": {},
    "outputs": [
     {
@@ -1804,21 +1821,21 @@
     }
    ],
    "source": [
-    "ddpm = DDPMScheduler(betas=(1e-4, 0.02), num_timesteps=config.num_timesteps, device=config.device)\n",
     "\n",
-    "nn_model = ContextUnet(n_param=config.n_param, image_size=config.HII_DIM)\n",
-    "print(\"resuming nn_model\")\n",
-    "nn_model.load_state_dict(torch.load(os.path.join(config.output_dir, f\"model_state.pth\"))['ema_unet_state_dict'])\n",
-    "# nn_model = ContextUnet(n_param=1, image_size=28)\n",
-    "# nn_model.train()\n",
-    "nn_model.to(ddpm.device)\n",
-    "nn_model.eval()\n",
     "\n",
-    "n_sample = 20\n",
-    "with torch.no_grad():\n",
-    "    x_last_ema, x_ema_entire = ddpm.sample(nn_model, n_sample, (1,config.HII_DIM, config.num_redshift), config.device, params = torch.tile(config.params_single,(n_sample,1)).to(config.device), guide_w=config.guide_w)\n",
     "\n",
-    "np.save(os.path.join(config.output_dir, f\"{config.run_name}_ema.npy\"), x_last_ema)"
    ]
   },
   {

     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "8be92a01e78a47b792d93b35d557885d",
        "version_major": 2,
        "version_minor": 0
       },
   },
   {
    "cell_type": "code",
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
     "class DDPMScheduler(nn.Module):\n",
+    "    def __init__(self, betas: tuple, num_timesteps: int, img_shape: list, device='cpu'):\n",
     "        super().__init__()\n",
     "        \n",
     "        beta_1, beta_T = betas\n",
     "        assert 0 < beta_1 <= beta_T <= 1, \"ensure 0 < beta_1 <= beta_T <= 1\"\n",
     "        self.device = device\n",
     "        self.num_timesteps = num_timesteps\n",
+    "        self.img_shape = img_shape\n",
     "        self.beta_t = torch.linspace(beta_1, beta_T, self.num_timesteps) #* (beta_T-beta_1) + beta_1\n",
     "        self.beta_t = self.beta_t.to(self.device)\n",
     "\n",
     "    def sample(self, nn_model, params, device, guide_w = 0):\n",
     "        n_sample = len(params) #params.shape[0]\n",
     "        # print(\"params.shape[0], len(params)\", params.shape[0], len(params))\n",
+    "        x_i = torch.randn(n_sample, *self.img_shape[1:]).to(device)\n",
     "        # print(\"x_i.shape =\", x_i.shape)\n",
     "        if guide_w != -1:\n",
     "            c_i = params\n",
     "            t_is = torch.tensor([i]).to(device)\n",
     "            t_is = t_is.repeat(n_sample)\n",
     "\n",
+    "            z = torch.randn(n_sample, *self.img_shape[1:]).to(device) if i > 0 else 0\n",
     "\n",
     "            if guide_w == -1:\n",
     "                # eps = nn_model(x_i, t_is, return_dict=False)[0]\n",
     "                # x_i = 1/torch.sqrt(self.alpha_t[i])*(x_i-eps*self.beta_t[i]/torch.sqrt(1-self.bar_alpha_t[i])) + torch.sqrt(self.beta_t[i])*z\n",
     "            else:\n",
     "                # double batch\n",
+    "                x_i = x_i.repeat(2, *torch.ones(len(self.img_shape[1:]), dtype=int).tolist())\n",
     "                t_is = t_is.repeat(2)\n",
     "\n",
     "                # split predictions and compute weighting\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 8,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 9,
    "metadata": {},
    "outputs": [],
    "source": [
     "class Downsample(nn.Module):\n",
+    "    def __init__(self, channels, use_conv, out_channels=None, dim=2, stride=(2,2)):\n",
     "        super().__init__()\n",
     "        self.channels = channels\n",
     "        self.out_channels = out_channels or channels\n",
+    "        # stride = config.stride\n",
     "        if use_conv:\n",
     "            # print(\"conv\")\n",
+    "            self.op = Conv[dim](channels, self.out_channels, 3, stride=stride, padding=1)\n",
     "        else:\n",
     "            # print(\"pool\")\n",
     "            assert channels == self.out_channels\n",
+    "            self.op = AvgPool[dim](kernel_size=stride, stride=stride)\n",
     "\n",
     "    def forward(self, x):\n",
     "        assert x.shape[1] == self.channels\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 10,
    "metadata": {},
    "outputs": [],
    "source": [
     "class Upsample(nn.Module):\n",
+    "    def __init__(self, channels, use_conv, out_channels=None, dim=2, stride=(2,2)):\n",
     "        super().__init__()\n",
     "        self.channels = channels\n",
     "        self.out_channels = out_channels\n",
     "        self.use_conv = use_conv\n",
+    "        self.stride = stride\n",
     "        if self.use_conv:\n",
+    "            self.conv = Conv[dim](self.channels, self.out_channels, 3, padding=1)\n",
     "\n",
     "    def forward(self, x):\n",
     "        assert x.shape[1] == self.channels\n",
+    "        # stride = config.stride\n",
     "        # print(torch.tensor(x.shape[2:]))\n",
     "        # print(torch.tensor(stride))\n",
+    "        shape = torch.tensor(x.shape[2:]) * torch.tensor(self.stride)\n",
     "        shape = tuple(shape.detach().numpy())\n",
     "        # print(shape)\n",
     "        x = F.interpolate(x, shape, mode='nearest')\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 11,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 12,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 13,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 14,
    "metadata": {},
    "outputs": [],
    "source": [
     "class ResBlock(TimestepBlock):\n",
     "    def __init__(\n",
+    "        self, channels, emb_channels, dropout, out_channels=None, use_conv=False, use_checkpoint=False, use_scale_shift_norm=False, up=False, down=False, dim=2, stride=(2,2),\n",
     "        ):\n",
     "        super().__init__()\n",
     "        self.out_channels = out_channels or channels\n",
     "        self.use_scale_shift_norm = use_scale_shift_norm\n",
+    "        self.stride = stride\n",
     "\n",
     "        self.in_layers = nn.Sequential(\n",
     "            # nn.BatchNorm2d(channels), # normalize to standard gaussian\n",
     "            normalization(channels, swish=1.0),\n",
     "            nn.Identity(),\n",
+    "            Conv[dim](channels, self.out_channels, 3, padding=1),\n",
     "            )\n",
     "\n",
     "        self.updown = up or down\n",
     "        if up:\n",
+    "            self.h_updown = Upsample(channels, False, dim=dim, stride=stride)\n",
+    "            self.x_updown = Upsample(channels, False, dim=dim, stride=stride)\n",
     "        elif down:\n",
+    "            self.h_updown = Downsample(channels, False, dim=dim, stride=stride)\n",
+    "            self.x_updown = Downsample(channels, False, dim=dim, stride=stride)\n",
     "        else:\n",
     "            self.h_updown = self.x_updown = nn.Identity()\n",
     "\n",
     "            normalization(self.out_channels, swish=0.0 if use_scale_shift_norm else 1.0),\n",
     "            nn.SiLU() if use_scale_shift_norm else nn.Identity(),\n",
     "            nn.Dropout(p=dropout),\n",
+    "            zero_module(Conv[dim](self.out_channels, self.out_channels, 3, padding=1)),\n",
     "        )\n",
     "\n",
     "        if self.out_channels == channels:\n",
     "            self.skip_connection = nn.Identity()\n",
     "        elif use_conv:\n",
+    "            self.skip_connection = Conv[dim](channels, self.out_channels, 3, padding=1)\n",
     "        else:\n",
+    "            self.skip_connection = Conv[dim](channels, self.out_channels, 1)\n",
     "        \n",
     "\n",
     "    def forward(self, x, emb):\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 15,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 16,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 17,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 18,
    "metadata": {},
    "outputs": [],
    "source": [
     "        resblock_updown = False,\n",
     "        conv_resample = True,\n",
     "        encoder_channels = None,\n",
+    "        dim = 2,\n",
+    "        stride = (2,2)\n",
     "        ):\n",
     "        super().__init__()\n",
     "\n",
     "\n",
     "        ###################### input_blocks ######################\n",
     "        self.input_blocks = nn.ModuleList(\n",
+    "            [TimestepEmbedSequential(Conv[dim](in_channels, ch, 3, padding=1))]\n",
     "        )\n",
     "        self._feature_size = ch\n",
     "        input_block_chans = [ch]\n",
     "                        out_channels = int(mult * model_channels),\n",
     "                        use_checkpoint = use_checkpoint,\n",
     "                        use_scale_shift_norm = use_scale_shift_norm,\n",
+    "                        dim = dim,\n",
+    "                        stride = stride,\n",
     "                    )\n",
     "                ]\n",
     "                ch = int(mult * model_channels)\n",
     "                            use_checkpoint=use_checkpoint,\n",
     "                            use_scale_shift_norm=use_scale_shift_norm,\n",
     "                            down=True,\n",
+    "                            dim = dim,\n",
+    "                            stride = stride,\n",
     "                        )\n",
     "                        if resblock_updown\n",
+    "                        else Downsample(ch, conv_resample, out_channels=out_ch, dim=dim, stride=stride)\n",
     "                    )\n",
     "                )\n",
     "                ch = out_ch\n",
     "                dropout,\n",
     "                use_checkpoint=use_checkpoint,\n",
     "                use_scale_shift_norm=use_scale_shift_norm,\n",
+    "                dim = dim,\n",
+    "                stride = stride,\n",
     "            ),\n",
     "            AttentionBlock(\n",
     "                ch,\n",
     "                dropout,\n",
     "                use_checkpoint=use_checkpoint,\n",
     "                use_scale_shift_norm=use_scale_shift_norm,\n",
+    "                dim = dim,\n",
+    "                stride = stride,\n",
     "            ),\n",
     "        )\n",
     "        self._feature_size += ch\n",
     "                        # dims=dims,\n",
     "                        use_checkpoint=use_checkpoint,\n",
     "                        use_scale_shift_norm=use_scale_shift_norm,\n",
+    "                        dim = dim,\n",
+    "                        stride = stride,\n",
     "                    )\n",
     "                ]\n",
     "                ch = int(model_channels * mult)\n",
     "                            use_checkpoint=use_checkpoint,\n",
     "                            use_scale_shift_norm=use_scale_shift_norm,\n",
     "                            up=True,\n",
+    "                            dim = dim,\n",
+    "                            stride = stride,\n",
     "                        )\n",
     "                        if resblock_updown\n",
+    "                        else Upsample(ch, conv_resample, out_channels=out_ch, dim=dim, stride=stride)\n",
     "                    )\n",
     "                    ds //= 2\n",
     "                self.output_blocks.append(TimestepEmbedSequential(*layers))\n",
     "            # nn.BatchNorm2d(ch),\n",
     "            normalization(ch, swish=1.0),\n",
     "            nn.Identity(),\n",
+    "            zero_module(Conv[dim](input_ch, out_channels, 3, padding=1)),\n",
     "        )\n",
     "        # self.use_fp16 = use_fp16\n",
     "\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 19,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 20,
    "metadata": {},
    "outputs": [],
    "source": [
     "    # dim = 2\n",
     "    dim = 2\n",
     "    stride = (2,2) if dim == 2 else (2,2,4)\n",
+    "    num_image = 200 # 2400\n",
     "    HII_DIM = 64\n",
     "    num_redshift = 512#256#256#64#512#128\n",
     "    img_shape = (HII_DIM, num_redshift) if dim == 2 else (HII_DIM, HII_DIM, num_redshift)\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 21,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 22,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 23,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 24,
    "metadata": {},
    "outputs": [],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 25,
    "metadata": {},
    "outputs": [
     {
       "51200 images can be loaded\n",
       "field.shape = (64, 64, 514)\n",
       "params keys = [b'ION_Tvir_MIN', b'HII_EFF_FACTOR']\n",
+      "loading 200 images randomly\n",
+      "images loaded: (200, 1, 64, 512)\n",
+      "params loaded: (200, 2)\n",
+      "images rescaled to [-1.0, 1.082756519317627]\n",
+      "params rescaled to [0.0, 0.9938162632551855]\n",
       "resumed nn_model from model_state.pth\n",
       "Number of parameters for nn_model: 111048705\n"
      ]
     "        self.dataloader = DataLoader(dataset, batch_size=config.train_batch_size, shuffle=True)\n",
     "        del dataset\n",
     "\n",
+    "        self.ddpm = DDPMScheduler(betas=(1e-4, 0.02), num_timesteps=config.num_timesteps, img_shape=config.img_shape, device=config.device)\n",
     "\n",
     "        # initialize the unet\n",
+    "        self.nn_model = ContextUnet(n_param=config.n_param, image_size=config.HII_DIM, dim=config.dim, stride=config.stride)\n",
     "\n",
     "        if config.resume:\n",
     "            self.nn_model.load_state_dict(torch.load(os.path.join(config.output_dir, f\"{config.resume}\"))['unet_state_dict'])\n",
     "        if config.ema:\n",
     "            self.ema = EMA(config.ema_rate)\n",
     "            if config.resume:\n",
+    "                self.ema_model = ContextUnet(n_param=config.n_param, image_size=config.HII_DIM, dim=config.dim, stride=config.stride).to(config.device)\n",
     "                self.ema_model.load_state_dict(torch.load(os.path.join(config.output_dir, f\"{config.resume}\"))['ema_unet_state_dict'])\n",
     "                print(f\"resumed ema_model from {config.resume}\")\n",
     "            else:\n",
   },
   {
    "cell_type": "code",
+   "execution_count": 26,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "7a0b627f28ef409f8504113bc3af36e3",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
+       "  0%|          | 0/20 [00:00<?, ?it/s]"
       ]
      },
      "metadata": {},
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "62f09cd440a84841b336ab15e76e2fe6",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
+       "  0%|          | 0/20 [00:00<?, ?it/s]"
       ]
      },
      "metadata": {},
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "9db24e29de0c47328f1aba68db61bbae",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
+       "  0%|          | 0/20 [00:00<?, ?it/s]"
       ]
      },
      "metadata": {},
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "ee59d1a664d04a2b90a7a448a816ed10",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
+       "  0%|          | 0/20 [00:00<?, ?it/s]"
       ]
      },
      "metadata": {},
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "8690c736f7eb4a23925b450c05659575",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
+       "  0%|          | 0/20 [00:00<?, ?it/s]"
       ]
      },
      "metadata": {},
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "7dc014a33bfd43408e0aafc208bb403e",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
+       "  0%|          | 0/20 [00:00<?, ?it/s]"
       ]
      },
      "metadata": {},
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "6715e5cccc6d480397f76bcea34f94e5",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
+       "  0%|          | 0/20 [00:00<?, ?it/s]"
       ]
      },
      "metadata": {},
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "b7410efd4a5d4efdb9b8be38ba1c2fcb",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
+       "  0%|          | 0/20 [00:00<?, ?it/s]"
       ]
      },
      "metadata": {},
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "3b6c0478c9ff4a99b7f79ba4422dbd7d",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
+       "  0%|          | 0/20 [00:00<?, ?it/s]"
       ]
      },
      "metadata": {},
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
+       "model_id": "d26f49f5a9804d84b6b6a531a56eb03a",
        "version_major": 2,
        "version_minor": 0
       },
       "text/plain": [
+       "  0%|          | 0/20 [00:00<?, ?it/s]"
       ]
      },
      "metadata": {},
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
     }
    ],
    "source": [
+    "# ddpm = DDPMScheduler(betas=(1e-4, 0.02), num_timesteps=config.num_timesteps, device=config.device)\n",
     "\n",
+    "# nn_model = ContextUnet(n_param=config.n_param, image_size=config.HII_DIM)\n",
+    "# print(\"resuming nn_model\")\n",
+    "# nn_model.load_state_dict(torch.load(os.path.join(config.output_dir, f\"model_state.pth\"))['ema_unet_state_dict'])\n",
+    "# # nn_model = ContextUnet(n_param=1, image_size=28)\n",
+    "# # nn_model.train()\n",
+    "# nn_model.to(ddpm.device)\n",
+    "# nn_model.eval()\n",
     "\n",
+    "# n_sample = 20\n",
+    "# with torch.no_grad():\n",
+    "#     x_last_ema, x_ema_entire = ddpm.sample(nn_model, n_sample, (1,config.HII_DIM, config.num_redshift), config.device, params = torch.tile(config.params_single,(n_sample,1)).to(config.device), guide_w=config.guide_w)\n",
     "\n",
+    "# np.save(os.path.join(config.output_dir, f\"{config.run_name}_ema.npy\"), x_last_ema)"
    ]
   },
   {