Add train files

Files changed (4) hide show

model.ipynb +490 -0
models.py +151 -0
train.py +63 -0
utils.py +96 -0

model.ipynb ADDED Viewed

	@@ -0,0 +1,490 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "import torch.nn as nn\n",
+    "import numpy as np\n",
+    "from torchvision.utils import save_image, make_grid\n",
+    "import matplotlib.pyplot as plt\n",
+    "from matplotlib.animation import FuncAnimation, PillowWriter\n",
+    "import os\n",
+    "import torchvision.transforms as transforms\n",
+    "from torch.utils.data import Dataset\n",
+    "from PIL import Image\n",
+    "from torch.utils.data import DataLoader\n",
+    "from tqdm.auto import tqdm\n",
+    "import torch.nn.functional as F"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class ResidualBlock(nn.Module):\n",
+    "    def __init__(self, in_channels: int, out_channels: int,is_res: bool = False) -> None:\n",
+    "        super(ResidualBlock,self).__init__()\n",
+    "\n",
+    "        self.same_channesls = in_channels == out_channels\n",
+    "\n",
+    "        self.is_res = is_res\n",
+    "\n",
+    "        self.conv1 = nn.Sequential(\n",
+    "            nn.Conv2d(in_channels,out_channels,3,1,1),\n",
+    "            nn.BatchNorm2d(out_channels),\n",
+    "            nn.GELU(),\n",
+    "        )\n",
+    "\n",
+    "        self.conv2 = nn.Sequential(\n",
+    "            nn.Conv2d(out_channels,out_channels,3,1,1),\n",
+    "            nn.BatchNorm2d(out_channels),\n",
+    "            nn.GELU(),\n",
+    "        )\n",
+    "\n",
+    "    def forward(self,x): \n",
+    "        if self.is_res:\n",
+    "            x1 = self.conv1(x)\n",
+    "\n",
+    "            x2 = self.conv2(x1)\n",
+    "\n",
+    "            if self.same_channesls:\n",
+    "                out = x1 + x2\n",
+    "            else:\n",
+    "                shortcut = nn.Conv2d(x.shape[1],x2.shape[1],1,1,0).to(x.device)\n",
+    "                out = shortcut(x) + x2\n",
+    "\n",
+    "            return out / 1.414\n",
+    "    \n",
+    "        else:\n",
+    "            x1 = self.conv1(x)\n",
+    "            x2 = self.conv2(x1)\n",
+    "            return x2\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class UnetUp(nn.Module):\n",
+    "    def __init__(self, in_channels, out_channels) -> None:\n",
+    "        super(UnetUp,self).__init__()\n",
+    "\n",
+    "        self.model = nn.Sequential(\n",
+    "            nn.ConvTranspose2d(in_channels,out_channels,2,2),\n",
+    "            ResidualBlock(out_channels,out_channels),\n",
+    "            ResidualBlock(out_channels,out_channels),\n",
+    "        )\n",
+    "\n",
+    "    def forward(self, x, skip):\n",
+    "        x = torch.cat([x,skip],1)\n",
+    "\n",
+    "        x = self.model(x)\n",
+    "        return x\n",
+    "    \n",
+    "class UnetDown(nn.Module):\n",
+    "    def __init__(self, input_channels, out_channels) -> None:\n",
+    "        super(UnetDown,self).__init__()\n",
+    "\n",
+    "        self.model = nn.Sequential(\n",
+    "            ResidualBlock(input_channels,out_channels),\n",
+    "            ResidualBlock(out_channels,out_channels),\n",
+    "            nn.MaxPool2d(2)\n",
+    "        )\n",
+    "\n",
+    "    def forward(self,x):\n",
+    "        return self.model(x)\n",
+    "    \n",
+    "\n",
+    "class EmbedFC(nn.Module):\n",
+    "    def __init__(self, input_dim,embed_dm) -> None:\n",
+    "        super(EmbedFC,self).__init__()\n",
+    "\n",
+    "        self.input_dim = input_dim\n",
+    "        \n",
+    "        self.model = nn.Sequential(\n",
+    "            nn.Linear(input_dim,embed_dm),\n",
+    "            nn.GELU(),\n",
+    "            nn.Linear(embed_dm,embed_dm),\n",
+    "        )\n",
+    "\n",
+    "    def forward(self,x):\n",
+    "        x = x.view(-1,self.input_dim)\n",
+    "        return self.model(x)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def unorm(x):\n",
+    "    # unity norm. results in range of [0,1]\n",
+    "    # assume x (h,w,3)\n",
+    "    xmax = x.max((0,1))\n",
+    "    xmin = x.min((0,1))\n",
+    "    return(x - xmin)/(xmax - xmin)\n",
+    "\n",
+    "def norm_all(store, n_t, n_s):\n",
+    "    # runs unity norm on all timesteps of all samples\n",
+    "    nstore = np.zeros_like(store)\n",
+    "    for t in range(n_t):\n",
+    "        for s in range(n_s):\n",
+    "            nstore[t,s] = unorm(store[t,s])\n",
+    "    return nstore\n",
+    "\n",
+    "def norm_torch(x_all):\n",
+    "    # runs unity norm on all timesteps of all samples\n",
+    "    # input is (n_samples, 3,h,w), the torch image format\n",
+    "    x = x_all.cpu().numpy()\n",
+    "    xmax = x.max((2,3))\n",
+    "    xmin = x.min((2,3))\n",
+    "    xmax = np.expand_dims(xmax,(2,3)) \n",
+    "    xmin = np.expand_dims(xmin,(2,3))\n",
+    "    nstore = (x - xmin)/(xmax - xmin)\n",
+    "    return torch.from_numpy(nstore)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def plot_grid(x,n_sample,n_rows,save_dir,w):\n",
+    "    # x:(n_sample, 3, h, w)\n",
+    "    ncols = n_sample//n_rows\n",
+    "    grid = make_grid(norm_torch(x), nrow=ncols)  # curiously, nrow is number of columns.. or number of items in the row.\n",
+    "    save_image(grid, save_dir + f\"run_image_w{w}.png\")\n",
+    "    print('saved image at ' + save_dir + f\"run_image_w{w}.png\")\n",
+    "    return grid\n",
+    "\n",
+    "def plot_sample(x_gen_store,n_sample,nrows,save_dir, fn,  w, save=False):\n",
+    "    ncols = n_sample//nrows\n",
+    "    sx_gen_store = np.moveaxis(x_gen_store,2,4)                               \n",
+    "    nsx_gen_store = norm_all(sx_gen_store, sx_gen_store.shape[0], n_sample)       \n",
+    "    fig, axs = plt.subplots(nrows=nrows, ncols=ncols, sharex=True, sharey=True,figsize=(ncols,nrows))\n",
+    "    def animate_diff(i, store):\n",
+    "        print(f'gif animating frame {i} of {store.shape[0]}', end='\\r')\n",
+    "        plots = []\n",
+    "        for row in range(nrows):\n",
+    "            for col in range(ncols):\n",
+    "                axs[row, col].clear()\n",
+    "                axs[row, col].set_xticks([])\n",
+    "                axs[row, col].set_yticks([])\n",
+    "                plots.append(axs[row, col].imshow(store[i,(row*ncols)+col]))\n",
+    "        return plots\n",
+    "    ani = FuncAnimation(fig, animate_diff, fargs=[nsx_gen_store],  interval=200, blit=False, repeat=True, frames=nsx_gen_store.shape[0]) \n",
+    "    plt.close()\n",
+    "    if save:\n",
+    "        ani.save(save_dir + f\"{fn}_w{w}.gif\", dpi=100, writer=PillowWriter(fps=5))\n",
+    "        print('saved gif at ' + save_dir + f\"{fn}_w{w}.gif\")\n",
+    "    return ani\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "transform = transforms.Compose([\n",
+    "    transforms.ToTensor(),                # from [0,255] to range [0.0,1.0]\n",
+    "    transforms.Normalize((0.5,), (0.5,))  # range [-1,1]\n",
+    "\n",
+    "])\n",
+    "\n",
+    "class CustomDataset(Dataset):\n",
+    "    def __init__(self, sfilename, lfilename, transform, null_context=False):\n",
+    "        self.sprites = np.load(sfilename)\n",
+    "        self.slabels = np.load(lfilename)\n",
+    "        print(f\"sprite shape: {self.sprites.shape}\")\n",
+    "        print(f\"labels shape: {self.slabels.shape}\")\n",
+    "        self.transform = transform\n",
+    "        self.null_context = null_context\n",
+    "        self.sprites_shape = self.sprites.shape\n",
+    "        self.slabel_shape = self.slabels.shape\n",
+    "                \n",
+    "    def __len__(self):\n",
+    "        return len(self.sprites)\n",
+    "    \n",
+    "    def __getitem__(self, idx):\n",
+    "        if self.transform:\n",
+    "            image = self.transform(self.sprites[idx])\n",
+    "            if self.null_context:\n",
+    "                label = torch.tensor(0).to(torch.int64)\n",
+    "            else:\n",
+    "                label = torch.tensor(self.slabels[idx]).to(torch.int64)\n",
+    "        return (image, label)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class ContextUnet(nn.Module):\n",
+    "    def __init__(self,in_channels, n_feat = 256,n_cfeat = 10, height = 28) -> None:\n",
+    "        super(ContextUnet,self).__init__()\n",
+    "\n",
+    "        self.in_channels = in_channels\n",
+    "        self.n_feat = n_feat\n",
+    "        self.n_cfeat = n_cfeat\n",
+    "        self.h = height\n",
+    "\n",
+    "        self.init_conv = ResidualBlock(in_channels,n_feat,is_res=True)\n",
+    "\n",
+    "        self.down1 = UnetDown(n_feat,n_feat)\n",
+    "        self.down2 = UnetDown(n_feat,n_feat * 2)\n",
+    "\n",
+    "        self.to_vec = nn.Sequential(nn.AvgPool2d((4)),nn.GELU())\n",
+    "\n",
+    "        self.timeembed1 = EmbedFC(1, 2 *n_feat)\n",
+    "        self.timeembed2 = EmbedFC(1,n_feat)\n",
+    "        self.contextembed1 = EmbedFC(n_cfeat,2 * n_feat)\n",
+    "        self.contextembed2 = EmbedFC(n_cfeat,n_feat)\n",
+    "\n",
+    "        self.up0 = nn.Sequential(\n",
+    "            nn.ConvTranspose2d(2 * n_feat,2*n_feat,self.h // 4,self.h // 4),\n",
+    "            nn.GroupNorm(8, 2*n_feat),\n",
+    "            nn.ReLU(),\n",
+    "        )\n",
+    "\n",
+    "        self.up1 = UnetUp(4 * n_feat,n_feat)\n",
+    "        self.up2 = UnetUp(2 * n_feat,n_feat)\n",
+    "\n",
+    "        self.out = nn.Sequential(\n",
+    "            nn.Conv2d(2 * n_feat, n_feat,3,1,1),\n",
+    "            nn.GroupNorm(8,n_feat),\n",
+    "            nn.ReLU(),\n",
+    "            nn.Conv2d(n_feat,self.in_channels,3,1,1)\n",
+    "        )\n",
+    "\n",
+    "    def forward(self,x,t,c=None):\n",
+    "        x = self.init_conv(x)\n",
+    "\n",
+    "        down1 = self.down1(x)\n",
+    "        down2 = self.down2(down1)\n",
+    "\n",
+    "        hidden_vec = self.to_vec(down2)\n",
+    "\n",
+    "        if c is None:\n",
+    "            c = torch.zeros(x.shape[0],self.n_cfeat).to(x)\n",
+    "        \n",
+    "        cemb1 = self.contextembed1(c).view(-1,self.n_cfeat*2,1,1)\n",
+    "        temb1 = self.timeembed1(t).view(-1,self.n_cfeat * 2,1,1)\n",
+    "        cemb2 = self.contextembed2(c).view(-1,self.n_cfeat,1,1)\n",
+    "        temb2 = self.timeembed2(t).view(-1,self.n_cfeat,1,1)\n",
+    "\n",
+    "        up0 = self.up0(hidden_vec)\n",
+    "        up1 =self.up1(up0*cemb1 + temb1,down2)\n",
+    "        up2 = self.up2(up1*cemb2+temb2,down1)\n",
+    "\n",
+    "        out = self.out(torch.cat((up2,x),1))\n",
+    "\n",
+    "        return out"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Hyperparameters\n",
+    "\n",
+    "timesteps = 500\n",
+    "beta1 = 1e-4\n",
+    "beta2 = 0.02\n",
+    "\n",
+    "device = \"cuda\"\n",
+    "n_feat = 64\n",
+    "n_cfeat = 5\n",
+    "height = 16\n",
+    "save_dir=\"./checkpoints\"\n",
+    "\n",
+    "batch_size = 100\n",
+    "n_epoch = 40\n",
+    "lrate = 1e-3"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "torch.Size([501])\n",
+      "torch.Size([501])\n",
+      "torch.Size([501])\n"
+     ]
+    }
+   ],
+   "source": [
+    "b_t = (beta2 - beta1) * torch.linspace(0,1,timesteps+1,device=device) + beta1\n",
+    "a_t = 1 - b_t\n",
+    "a_bt = torch.cumsum(a_t.log(),0).exp()\n",
+    "a_bt[0] = 1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dataset = CustomDataset(\"./sprites_1788_16x16.npy\", \"./sprite_labels_nc_1788_16x16.npy\", transform, null_context=False)\n",
+    "dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nn_model = ContextUnet(3,n_feat,n_cfeat,height)\n",
+    "optim = torch.optim.Adam(nn_model.parameters(),lrate)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def perturb_input(x, t, noise):\n",
+    "    return a_bt.sqrt()[t, None, None, None] * x + (1 - a_bt[t, None, None, None]) * noise"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "nn_model.train()\n",
+    "\n",
+    "for epoch in range(n_epoch):\n",
+    "\n",
+    "    optim.param_groups[0]['lr'] = lrate * (1-epoch/n_epoch)\n",
+    "    for x,_ in tqdm(dataloader):\n",
+    "        optim.zero_grad()\n",
+    "\n",
+    "        x = x.to(device)\n",
+    "\n",
+    "        t = torch.randint(1,timesteps+1,x.shape[0]).to(device)\n",
+    "        noise = torch.randn_like(x)\n",
+    "        x_pert = perturb_input(x,t,noise)\n",
+    "\n",
+    "        pred = nn_model(x_pert,t / timesteps)\n",
+    "\n",
+    "        loss = F.mse_loss(pred,noise)\n",
+    "        loss.backward()\n",
+    "        optim.step()\n",
+    "\n",
+    "    if epoch % 1 == 0 and epoch >0:\n",
+    "        if not os.path.exists(save_dir):\n",
+    "            os.mkdir(save_dir)\n",
+    "        torch.save(nn_model,save_dir + f\"model_Epoch{epoch}.pth\")\n",
+    "        print(\"Saved model\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def denoise_add_noise(x,t,pred_noise,z=None):\n",
+    "    if z is None:\n",
+    "        z = torch.randn_like(x)\n",
+    "    noise  = b_t.sqrt()[t]\n",
+    "    mean  = x - (pred_noise * ((1-a_t[t]) / (1-a_bt[t]).sqrt())) / a_t[t].sqrt()\n",
+    "    return mean + noise\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "@torch.no_grad()\n",
+    "def sample_ddpm(n_sample,save_rate=20):\n",
+    "    # x_T ~ N(0, 1), sample initial noise\n",
+    "    samples = torch.randn(n_sample,3,height,height)\n",
+    "\n",
+    "    intermediate = []\n",
+    "    for i in range(timesteps,0,-1):\n",
+    "        print(f\"Sampling timestep: {i}\")\n",
+    "\n",
+    "        t = torch.tensor([i/timesteps])[:,None,None,None].to(device)\n",
+    "\n",
+    "        z = torch.randn_like(samples)\n",
+    "\n",
+    "        pred = nn_model(samples,t)\n",
+    "        samples = denoise_add_noise(samples,t,pred,z)\n",
+    "        if i % save_rate ==0 or i==timesteps or i<8:\n",
+    "            intermediate.append(samples.detach().cpu().numpy())\n",
+    "\n",
+    "    intermediate = np.stack(intermediate)\n",
+    "    return samples,intermediate\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = torch.load(f\"{save_dir}/model_Epoch_35\")\n",
+    "model.eval()\n",
+    "print(\"Loaded model\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "plt.clf()\n",
+    "samples, intermediate = sample_ddpm(32)\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

models.py ADDED Viewed

	@@ -0,0 +1,151 @@

+import torch.nn as nn
+import torch
+class ResidualBlock(nn.Module):
+    def __init__(self, in_channels: int, out_channels: int,is_res: bool = False) -> None:
+        super(ResidualBlock,self).__init__()
+        self.same_channesls = in_channels == out_channels
+        self.is_res = is_res
+        self.conv1 = nn.Sequential(
+            nn.Conv2d(in_channels,out_channels,3,1,1),
+            nn.BatchNorm2d(out_channels),
+            nn.GELU(),
+        )
+        self.conv2 = nn.Sequential(
+            nn.Conv2d(out_channels,out_channels,3,1,1),
+            nn.BatchNorm2d(out_channels),
+            nn.GELU(),
+        )
+    def forward(self,x):
+        if self.is_res:
+            x1 = self.conv1(x)
+            x2 = self.conv2(x1)
+            if self.same_channesls:
+                out = x1 + x2
+            else:
+                shortcut = nn.Conv2d(x.shape[1],x2.shape[1],1,1,0).to(x.device)
+                out = shortcut(x) + x2
+            return out / 1.414
+        else:
+            x1 = self.conv1(x)
+            x2 = self.conv2(x1)
+            return x2
+class UnetUp(nn.Module):
+    def __init__(self, in_channels, out_channels) -> None:
+        super(UnetUp,self).__init__()
+        self.model = nn.Sequential(
+            nn.ConvTranspose2d(in_channels,out_channels,2,2),
+            ResidualBlock(out_channels,out_channels),
+            ResidualBlock(out_channels,out_channels),
+        )
+    def forward(self, x, skip):
+        x = torch.cat([x,skip],1)
+        x = self.model(x)
+        return x
+class UnetDown(nn.Module):
+    def __init__(self, input_channels, out_channels) -> None:
+        super(UnetDown,self).__init__()
+        self.model = nn.Sequential(
+            ResidualBlock(input_channels,out_channels),
+            ResidualBlock(out_channels,out_channels),
+            nn.MaxPool2d(2)
+        )
+    def forward(self,x):
+        return self.model(x)
+class EmbedFC(nn.Module):
+    def __init__(self, input_dim,embed_dm) -> None:
+        super(EmbedFC,self).__init__()
+        self.input_dim = input_dim
+        self.model = nn.Sequential(
+            nn.Linear(input_dim,embed_dm),
+            nn.GELU(),
+            nn.Linear(embed_dm,embed_dm),
+        )
+    def forward(self,x):
+        x = x.view(-1,self.input_dim)
+        return self.model(x)
+class ContextUnet(nn.Module):
+    def __init__(self,in_channels, n_feat = 256,n_cfeat = 10, height = 28) -> None:
+        super(ContextUnet,self).__init__()
+        self.in_channels = in_channels
+        self.n_feat = n_feat
+        self.n_cfeat = n_cfeat
+        self.h = height
+        self.init_conv = ResidualBlock(in_channels,n_feat,is_res=True)
+        self.down1 = UnetDown(n_feat,n_feat)
+        self.down2 = UnetDown(n_feat,n_feat * 2)
+        self.to_vec = nn.Sequential(nn.AvgPool2d((4)),nn.GELU())
+        self.timeembed1 = EmbedFC(1, 2 *n_feat)
+        self.timeembed2 = EmbedFC(1,n_feat)
+        self.contextembed1 = EmbedFC(n_cfeat,2 * n_feat)
+        self.contextembed2 = EmbedFC(n_cfeat,n_feat)
+        self.up0 = nn.Sequential(
+            nn.ConvTranspose2d(2 * n_feat,2*n_feat,self.h // 4,self.h // 4),
+            nn.GroupNorm(8, 2*n_feat),
+            nn.ReLU(),
+        )
+        self.up1 = UnetUp(4 * n_feat,n_feat)
+        self.up2 = UnetUp(2 * n_feat,n_feat)
+        self.out = nn.Sequential(
+            nn.Conv2d(2 * n_feat, n_feat,3,1,1),
+            nn.GroupNorm(8,n_feat),
+            nn.ReLU(),
+            nn.Conv2d(n_feat,self.in_channels,3,1,1)
+        )
+    def forward(self,x,t,c=None):
+        x = self.init_conv(x)
+        down1 = self.down1(x)
+        down2 = self.down2(down1)
+        hidden_vec = self.to_vec(down2)
+        if c is None:
+            c = torch.zeros(x.shape[0],self.n_cfeat).to(x)
+        cemb1 = self.contextembed1(c).view(-1,self.n_cfeat*2,1,1)
+        temb1 = self.timeembed1(t).view(-1,self.n_cfeat * 2,1,1)
+        cemb2 = self.contextembed2(c).view(-1,self.n_cfeat,1,1)
+        temb2 = self.timeembed2(t).view(-1,self.n_cfeat,1,1)
+        up0 = self.up0(hidden_vec)
+        up1 =self.up1(up0*cemb1 + temb1,down2)
+        up2 = self.up2(up1*cemb2+temb2,down1)
+        out = self.out(torch.cat((up2,x),1))
+        return out

train.py ADDED Viewed

	@@ -0,0 +1,63 @@

+import torch
+from utils import *
+from torch.utils.data import DataLoader
+from models import *
+from tqdm.auto import tqdm
+timesteps = 500
+beta1 = 1e-4
+beta2 = 0.02
+device = "cuda"
+n_feat = 64
+n_cfeat = 5
+height = 16
+save_dir="./checkpoints"
+batch_size = 100
+n_epoch = 40
+lrate = 1e-3
+b_t = (beta2 - beta1) * torch.linspace(0,1,timesteps+1,device=device) + beta1
+a_t = 1 - b_t
+a_bt = torch.cumsum(a_t.log(),0).exp()
+a_bt[0] = 1
+dataset = CustomDataset("./sprites_1788_16x16.npy", "./sprite_labels_nc_1788_16x16.npy", transform, null_context=False)
+dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=1)
+nn_model = ContextUnet(3,n_feat,n_cfeat,height)
+optim = torch.optim.Adam(nn_model.parameters(),lrate)
+def perturb_input(x, t, noise):
+    return a_bt.sqrt()[t, None, None, None] * x + (1 - a_bt[t, None, None, None]) * noise
+nn_model.train()
+for epoch in range(n_epoch):
+    optim.param_groups[0]['lr'] = lrate * (1-epoch/n_epoch)
+    for x,_ in tqdm(dataloader):
+        optim.zero_grad()
+        x = x.to(device)
+        t = torch.randint(1,timesteps+1,x.shape[0]).to(device)
+        noise = torch.randn_like(x)
+        x_pert = perturb_input(x,t,noise)
+        pred = nn_model(x_pert,t / timesteps)
+        loss = F.mse_loss(pred,noise)
+        loss.backward()
+        optim.step()
+    if epoch % 1 == 0 and epoch >0:
+        if not os.path.exists(save_dir):
+            os.mkdir(save_dir)
+        torch.save(nn_model,save_dir + f"model_Epoch{epoch}.pth")
+        print("Saved model")

utils.py ADDED Viewed

	@@ -0,0 +1,96 @@

+import numpy as np
+import torch
+from torchvision.utils import save_image, make_grid
+import matplotlib.pyplot as plt
+from matplotlib.animation import FuncAnimation, PillowWriter
+from torchvision import transforms
+from torch.utils.data import Dataset
+def unorm(x):
+    # unity norm. results in range of [0,1]
+    # assume x (h,w,3)
+    xmax = x.max((0,1))
+    xmin = x.min((0,1))
+    return(x - xmin)/(xmax - xmin)
+def norm_all(store, n_t, n_s):
+    # runs unity norm on all timesteps of all samples
+    nstore = np.zeros_like(store)
+    for t in range(n_t):
+        for s in range(n_s):
+            nstore[t,s] = unorm(store[t,s])
+    return nstore
+def norm_torch(x_all):
+    # runs unity norm on all timesteps of all samples
+    # input is (n_samples, 3,h,w), the torch image format
+    x = x_all.cpu().numpy()
+    xmax = x.max((2,3))
+    xmin = x.min((2,3))
+    xmax = np.expand_dims(xmax,(2,3))
+    xmin = np.expand_dims(xmin,(2,3))
+    nstore = (x - xmin)/(xmax - xmin)
+    return torch.from_numpy(nstore)
+def plot_grid(x,n_sample,n_rows,save_dir,w):
+    # x:(n_sample, 3, h, w)
+    ncols = n_sample//n_rows
+    grid = make_grid(norm_torch(x), nrow=ncols)  # curiously, nrow is number of columns.. or number of items in the row.
+    save_image(grid, save_dir + f"run_image_w{w}.png")
+    print('saved image at ' + save_dir + f"run_image_w{w}.png")
+    return grid
+def plot_sample(x_gen_store,n_sample,nrows,save_dir, fn,  w, save=False):
+    ncols = n_sample//nrows
+    sx_gen_store = np.moveaxis(x_gen_store,2,4)
+    nsx_gen_store = norm_all(sx_gen_store, sx_gen_store.shape[0], n_sample)
+    fig, axs = plt.subplots(nrows=nrows, ncols=ncols, sharex=True, sharey=True,figsize=(ncols,nrows))
+    def animate_diff(i, store):
+        print(f'gif animating frame {i} of {store.shape[0]}', end='\r')
+        plots = []
+        for row in range(nrows):
+            for col in range(ncols):
+                axs[row, col].clear()
+                axs[row, col].set_xticks([])
+                axs[row, col].set_yticks([])
+                plots.append(axs[row, col].imshow(store[i,(row*ncols)+col]))
+        return plots
+    ani = FuncAnimation(fig, animate_diff, fargs=[nsx_gen_store],  interval=200, blit=False, repeat=True, frames=nsx_gen_store.shape[0])
+    plt.close()
+    if save:
+        ani.save(save_dir + f"{fn}_w{w}.gif", dpi=100, writer=PillowWriter(fps=5))
+        print('saved gif at ' + save_dir + f"{fn}_w{w}.gif")
+    return ani
+transform = transforms.Compose([
+    transforms.ToTensor(),                # from [0,255] to range [0.0,1.0]
+    transforms.Normalize((0.5,), (0.5,))  # range [-1,1]
+])
+class CustomDataset(Dataset):
+    def __init__(self, sfilename, lfilename, transform, null_context=False):
+        self.sprites = np.load(sfilename)
+        self.slabels = np.load(lfilename)
+        print(f"sprite shape: {self.sprites.shape}")
+        print(f"labels shape: {self.slabels.shape}")
+        self.transform = transform
+        self.null_context = null_context
+        self.sprites_shape = self.sprites.shape
+        self.slabel_shape = self.slabels.shape
+    def __len__(self):
+        return len(self.sprites)
+    def __getitem__(self, idx):
+        if self.transform:
+            image = self.transform(self.sprites[idx])
+            if self.null_context:
+                label = torch.tensor(0).to(torch.int64)
+            else:
+                label = torch.tensor(self.slabels[idx]).to(torch.int64)
+        return (image, label)