theislab
/

Nicheformer

@@ -100,6 +100,54 @@ class NicheformerModel(NicheformerPreTrainedModel):
         )
         return transformer_output
 class NicheformerForMaskedLM(NicheformerPreTrainedModel):
     def __init__(self, config: NicheformerConfig):
@@ -160,6 +208,24 @@ class NicheformerForMaskedLM(NicheformerPreTrainedModel):
             hidden_states=transformer_output,
         )
 def complete_masking(batch, masking_p, n_tokens):
     """Apply masking to input batch for masked language modeling.

         )
         return transformer_output
+    def get_embeddings(self, input_ids, attention_mask=None, layer: int = -1, with_context: bool = False) -> torch.Tensor:
+        """Get embeddings from the model.
+        Args:
+            input_ids: Input token IDs
+            attention_mask: Attention mask
+            layer: Which transformer layer to extract embeddings from (-1 means last layer)
+            with_context: Whether to include context tokens in the embeddings
+        Returns:
+            torch.Tensor: Embeddings tensor
+        """
+        # Get token embeddings and positional encodings
+        token_embedding = self.embeddings(input_ids)
+        if self.config.learnable_pe:
+            pos_embedding = self.positional_embedding(self.pos.to(token_embedding.device))
+            embeddings = self.dropout(token_embedding + pos_embedding)
+        else:
+            embeddings = self.positional_embedding(token_embedding)
+        # Process through transformer layers up to desired layer
+        if layer < 0:
+            layer = self.config.nlayers + layer  # -1 means last layer
+        # Convert attention_mask to boolean and invert it for transformer's src_key_padding_mask
+        if attention_mask is not None:
+            padding_mask = ~attention_mask.bool()
+        else:
+            padding_mask = None
+        # Process through each layer up to the desired one
+        for i in range(layer + 1):
+            embeddings = self.encoder.layers[i](
+                embeddings,
+                src_key_padding_mask=padding_mask,
+                is_causal=False
+            )
+        # Remove context tokens (first 3 tokens) if not needed
+        if not with_context:
+            embeddings = embeddings[:, 3:, :]
+        # Mean pooling over sequence dimension
+        embeddings = embeddings.mean(dim=1)
+        return embeddings
 class NicheformerForMaskedLM(NicheformerPreTrainedModel):
     def __init__(self, config: NicheformerConfig):
             hidden_states=transformer_output,
         )
+    def get_embeddings(self, input_ids, attention_mask=None, layer: int = -1, with_context: bool = False) -> torch.Tensor:
+        """Get embeddings from the model.
+        Args:
+            input_ids: Input token IDs
+            attention_mask: Attention mask
+            layer: Which transformer layer to extract embeddings from (-1 means last layer)
+            with_context: Whether to include context tokens in the embeddings
+        Returns:
+            torch.Tensor: Embeddings tensor
+        """
+        return self.nicheformer.get_embeddings(
+            input_ids=input_ids,
+            attention_mask=attention_mask,
+            layer=layer,
+            with_context=with_context
+        )
 def complete_masking(batch, masking_p, n_tokens):
     """Apply masking to input batch for masked language modeling.