mineself2016
/

GeneMamba

@@ -219,8 +219,8 @@ class GeneMambaModel(GeneMambaPreTrainedModel):
             num_hidden_layers=config.num_hidden_layers
         )
-        # Final layer normalization
-        self.norm = RMSNorm(config.hidden_size)
         self.apply(self._init_weights)
@@ -254,7 +254,7 @@ class GeneMambaModel(GeneMambaPreTrainedModel):
         hidden_states = self.mamba_mixer(hidden_states, attention_mask)
         # Apply final normalization
-        hidden_states = self.norm(hidden_states)
         # Compute pooled embedding (cell representation)
         if self.config.embedding_pooling == "CLS":

             num_hidden_layers=config.num_hidden_layers
         )
+        # Final layer normalization (kept as norm_f to match checkpoint key names)
+        self.norm_f = RMSNorm(config.hidden_size)
         self.apply(self._init_weights)
         hidden_states = self.mamba_mixer(hidden_states, attention_mask)
         # Apply final normalization
+        hidden_states = self.norm_f(hidden_states)
         # Compute pooled embedding (cell representation)
         if self.config.embedding_pooling == "CLS":