YashNagraj75
/

Latent-Diffusion-Conditional

Model card Files Files and versions

xet

Community

Yash Nagraj commited on Jan 9, 2025

Commit

6801d5a

1 Parent(s): de5e356

Add all the layers in the UpBlock

Browse files

Files changed (1) hide show

models/blocks.py +71 -0

models/blocks.py CHANGED Viewed

@@ -152,6 +152,7 @@ class MidBlock(nn.Module):
     """
     def __init__(self, in_channels, out_channels, t_emb_dim, num_heads, num_layers, norm_dim, cross_attn=None, context_dim=None):
         self.in_channels = in_channels
         self.out_channels = out_channels
         self.t_emb_dim = t_emb_dim
@@ -258,3 +259,73 @@ class MidBlock(nn.Module):
                 out = out + self.time_emb_layers[i+1](t_emb)[:, :, None, None]
             out = out + self.resnet_conv_two[i+1](out)
             out = out + self.residual_input_conv[i+1](resnet_input)

     """
     def __init__(self, in_channels, out_channels, t_emb_dim, num_heads, num_layers, norm_dim, cross_attn=None, context_dim=None):
+        super().__init__()
         self.in_channels = in_channels
         self.out_channels = out_channels
         self.t_emb_dim = t_emb_dim
                 out = out + self.time_emb_layers[i+1](t_emb)[:, :, None, None]
             out = out + self.resnet_conv_two[i+1](out)
             out = out + self.residual_input_conv[i+1](resnet_input)
+        return out
+class UpBlock(nn.Module):
+    """
+    Up Block that upsamples the image, flows like this:
+    1) UpSample
+    2) Concat down block output
+    3) Resnet block with time embedding
+    4) Attention Block
+    """
+    def __init__(self, in_channels, out_channels, t_emb_dim, up_sample, num_layers, attn, norm_channels, num_heads):
+        super().__init__()
+        self.num_layers = num_layers
+        self.attn = attn
+        self.norm_channels = norm_channels
+        self.resnet_conv_one = nn.ModuleList([
+            nn.Sequential(
+                nn.GroupNorm(norm_channels, in_channels if i ==
+                             0 else out_channels),
+                nn.SiLU(),
+                nn.Conv2d(in_channels if i == 0 else out_channels,
+                          out_channels, 3, 1, 1)
+            ) for i in range(num_layers)
+        ])
+        if t_emb_dim is not None:
+            self.time_emb_layers = nn.ModuleList(
+                [
+                    nn.Sequential(
+                        nn.SiLU(),
+                        nn.Linear(t_emb_dim, out_channels)
+                    ) for _ in range(num_layers)
+                ]
+            )
+        self.resnet_conv_two = nn.ModuleList([
+            nn.Sequential(
+                nn.GroupNorm(norm_channels, out_channels),
+                nn.SiLU(),
+                nn.Conv2d(out_channels, out_channels, 3, 1, 1)
+            ) for _ in range(num_layers)
+        ])
+        if self.attn:
+            self.attention_norms = nn.ModuleList([
+                nn.GroupNorm(norm_channels, out_channels)
+                for _ in range(num_layers)
+            ])
+            self.attention_heads = nn.ModuleList(
+                [nn.MultiheadAttention(
+                    out_channels, num_heads, batch_first=True) for _ in range(num_layers)]
+            )
+        self.resnet_input_conv = nn.ModuleList([
+            nn.Conv2d(in_channels if i == 0 else out_channels,
+                      out_channels, 3, 1, 1)
+            for i in range(num_layers)
+        ])
+        self.upsample = nn.ConvTranspose2d(
+            in_channels, in_channels, 4, 2, 1) if self.upsample else nn.Identity()