InstaDeepAI
/

ChatNT

Text Generation

feature-extraction

Model card Files Files and versions

Yanisadel commited on Apr 3, 2025

Commit

d31120f

·

1 Parent(s): d91714f

Update chatNT.py

Files changed (1) hide show

chatNT.py +14 -3

chatNT.py CHANGED Viewed

@@ -1661,14 +1661,24 @@ class TorchMultiModalPerceiverResamplerBlock(nn.Module):
         )
     def mlp(self, x: torch.Tensor) -> torch.Tensor:
         x = self.norm_mlp(x)
         if self.use_glu_in_ffn:
             x1, x2 = torch.chunk(self.fc1(x), 2, dim=-1)
             x = self.activation_fn(x1) * x2
         else:
-            x = self.activation_fn(self.fc1(x))
-        return self.fc2(x)
     def forward(
         self,
         x: torch.Tensor,
@@ -1703,7 +1713,8 @@ class TorchMultiModalPerceiverResamplerBlock(nn.Module):
         outs_news["ATTENTION_layer3_cross_attention_layer_2"] = attn_output.clone()
         x = res + attn_output
-        x = x + self.mlp(x)
         outs_news["ATTENTION_after_mlp"] = x.clone()
         output = {}

         )
     def mlp(self, x: torch.Tensor) -> torch.Tensor:
+        outs = {}
         x = self.norm_mlp(x)
+        outs["MLP_layer0_layer_norm"] = x.clone()
         if self.use_glu_in_ffn:
             x1, x2 = torch.chunk(self.fc1(x), 2, dim=-1)
             x = self.activation_fn(x1) * x2
         else:
+            x = self.fc1(x)
+            outs["MLP_layer1_fc1"] = x.clone()
+            x = self.activation_fn(x)
+            outs["MLP_layer2_activation"] = x.clone()
+        x = self.fc2(x)
+        outs["MLP_layer3_fc2"] = x.clone()
+        outs["x"] = x.clone()
+        return outs
     def forward(
         self,
         x: torch.Tensor,
         outs_news["ATTENTION_layer3_cross_attention_layer_2"] = attn_output.clone()
         x = res + attn_output
+        mlp_output = self.mlp(x)
+        x = x + mlp_output["x"]
         outs_news["ATTENTION_after_mlp"] = x.clone()
         output = {}