YashNagraj75
/

Latent-Diffusion-Conditional

Model card Files Files and versions

xet

Community

Yash Nagraj commited on Jan 8, 2025

Commit

de5e356

1 Parent(s): 38d054a

Add forward function to the midblock

Browse files

Files changed (1) hide show

models/blocks.py +40 -0

models/blocks.py CHANGED Viewed

@@ -157,6 +157,7 @@ class MidBlock(nn.Module):
         self.t_emb_dim = t_emb_dim
         self.cross_attn = cross_attn
         self.context_dim = context_dim
         self.resnet_conv_one = nn.ModuleList([
             nn.Sequential(
                 nn.GroupNorm(norm_dim, in_channels if i ==
@@ -218,3 +219,42 @@ class MidBlock(nn.Module):
             for i in range(num_layers + 1)
         ])

         self.t_emb_dim = t_emb_dim
         self.cross_attn = cross_attn
         self.context_dim = context_dim
+        self.num_layers = num_layers
         self.resnet_conv_one = nn.ModuleList([
             nn.Sequential(
                 nn.GroupNorm(norm_dim, in_channels if i ==
             for i in range(num_layers + 1)
         ])
+    def forward(self, x, t_emb=None, context=None):
+        out = x
+        resnet_input = out
+        out = self.resnet_conv_one[0](out)
+        if self.t_emb_dim is not None:
+            out = out + self.time_emb_layers[0](t_emb)[:, :, None, None]
+        out = self.resnet_conv_two[0](out)
+        out = out + self.residual_input_conv[0](resnet_input)
+        for i in range(self.num_layers):
+            batch_size, channels, h, w = out.shape
+            in_attn = out.reshape(batch_size, channels, h*w)
+            in_attn = self.attention_norms[i](in_attn)
+            in_attn = in_attn.transpose(1, 2)
+            out_attn, _ = self.attention_heads[i](in_attn, in_attn, in_attn)
+            out_attn = out_attn.reshape(batch_size, channels, h, w)
+            out = out + out_attn
+            if self.cross_attn:
+                assert context is not None, "Context needed when using cross attn"
+                batch_size, channels, h, w = out.shape
+                in_attn = out.reshape(batch_size, channels, h*w)
+                in_attn = self.cross_attn_norms[i](in_attn)
+                in_attn = in_attn.transpose(1, 2)
+                assert context.shape[0] == x.shape[0] and context.shape[-1] == self.context_dim
+                context_proj = self.context_proj[i](context)
+                out_attn, _ = self.cross_attn[i](
+                    in_attn, context_proj, context_proj)
+                out_attn = out_attn.transpose(1, 2).reshape(
+                    batch_size, channels, h, w)
+                out = out + out_attn
+            resnet_input = out
+            out = self.resnet_conv_one[i+1](out)
+            if self.t_emb_dim is not None:
+                out = out + self.time_emb_layers[i+1](t_emb)[:, :, None, None]
+            out = out + self.resnet_conv_two[i+1](out)
+            out = out + self.residual_input_conv[i+1](resnet_input)