InstaDeepAI
/

ChatNT

Text Generation

feature-extraction

Model card Files Files and versions

Yanisadel commited on Apr 1, 2025

Commit

80a78d5

·

1 Parent(s): 8d737ee

Update chatNT.py

Files changed (1) hide show

chatNT.py +3 -0

chatNT.py CHANGED Viewed

@@ -426,6 +426,7 @@ class TorchBioBrainDecoder(nn.Module):
                 )
         # Regular GPT pass through
         embeddings = self.gpt_model.apply_transformer_layers(tokens_embeddings)
         embeddings = self.gpt_model.final_norm(embeddings)
@@ -885,6 +886,7 @@ class TorchGptGroupedQueryAttention(nn.Module):
         value_inputs: torch.Tensor,
         attention_mask: torch.Tensor = None,
     ) -> torch.Tensor:
         batch_size, seq_len, _ = query_inputs.shape
         queries = self.query_linear(query_inputs).view(  # noqa
@@ -966,6 +968,7 @@ class TorchGptDecoder(nn.Module):
         if attention_mask is None:
             attention_mask = build_causal_attention_mask(1, embeddings.shape[1])
         for layer in self.layers:
             embeddings = layer(embeddings, attention_mask)
         return embeddings

                 )
         # Regular GPT pass through
+        print("(debug) tokens embeddings shape : ", tokens_embeddings.shape)
         embeddings = self.gpt_model.apply_transformer_layers(tokens_embeddings)
         embeddings = self.gpt_model.final_norm(embeddings)
         value_inputs: torch.Tensor,
         attention_mask: torch.Tensor = None,
     ) -> torch.Tensor:
+        print("(debug) Query input shape : ", query_inputs.shape)
         batch_size, seq_len, _ = query_inputs.shape
         queries = self.query_linear(query_inputs).view(  # noqa
         if attention_mask is None:
             attention_mask = build_causal_attention_mask(1, embeddings.shape[1])
         for layer in self.layers:
+            print("Embedding shape in apply_transformer_layers : ", embeddings.shape)
             embeddings = layer(embeddings, attention_mask)
         return embeddings