Alverciito
/

wikipedia_segmentation

@@ -168,27 +168,20 @@ class SentenceCoseNet(PreTrainedModel):
                 Contextualized token embeddings with shape
                 `(batch_size, sequence_length, emb_dim)`.
         """
         # Convert to type:
-        x = input_ids.int().unsqueeze(1)
-        mask = attention_mask.unsqueeze(1) if attention_mask is not None else None
-        # Embedding and positional encoding:
-        x = self.model.embedding(x)
-        x = self.model.positional_encoding(x)
-        # Reshape x and mask:
-        _b, _s, _t, _d = x.shape
-        x = x.reshape(_b * _s, _t, _d)
-        if mask is not None:
-            mask = mask.reshape(_b * _s, _t).bool()
-        # Encode the sequence:
-        for encoder in self.model.encoder_blocks:
-            x = encoder(x, mask=mask)
-        # Reshape x and mask:
-        x = x.reshape(_b, _s, _t, _d)
-        return x.squeeze(1)
     def get_sentence_embedding(
             self,
@@ -212,37 +205,14 @@ class SentenceCoseNet(PreTrainedModel):
             torch.Tensor:
                 Sentence embeddings of shape (B, D)
         """
-        # Convert to type:
-        x = input_ids.int().unsqueeze(1)
-        mask = attention_mask.unsqueeze(1) if attention_mask is not None else None
-        # Embedding and positional encoding:
-        x = self.model.embedding(x)
-        x = self.model.positional_encoding(x)
-        # Reshape x and mask:
-        _b, _s, _t, _d = x.shape
-        x = x.reshape(_b * _s, _t, _d)
-        if mask is not None:
-            mask = mask.reshape(_b * _s, _t).bool()
-        # Encode the sequence:
-        for encoder in self.model.encoder_blocks:
-            x = encoder(x, mask=mask)
-        # Reshape x and mask:
-        x = x.reshape(_b, _s, _t, _d)
-        if mask is not None:
-            mask = mask.reshape(_b, _s, _t)
-            mask = torch.logical_not(mask) if not self.model.valid_padding else mask
-        # Apply pooling:
-        x, mask = self.model.pooling(x, mask=mask)
-        # Apply normalization if required:
         if normalize:
-            x = torch.nn.functional.normalize(x, p=2, dim=-1)
-        return x.squeeze(1)
     def similarity(self, embeddings_1: torch.Tensor, embeddings_2: torch.Tensor) -> torch.Tensor:
         """
@@ -268,7 +238,6 @@ class SentenceCoseNet(PreTrainedModel):
         # Return cosine similarities (B, S):
         return embeddings[..., 0, 1]
     def forward(
         self,
         input_ids: torch.Tensor,
@@ -296,6 +265,7 @@ class SentenceCoseNet(PreTrainedModel):
         Returns:
             Model-specific output as produced by `SegmentationNetwork`.
         """
         return self.model(
             x=input_ids,
             mask=attention_mask,
@@ -303,6 +273,29 @@ class SentenceCoseNet(PreTrainedModel):
             **kwargs,
         )
     @staticmethod
     def to_model_config(config: SentenceCoseNetConfig) -> ModelConfig:
         """

                 Contextualized token embeddings with shape
                 `(batch_size, sequence_length, emb_dim)`.
         """
+        # Set the model task:
+        self.model.task = 'token_encoding'
         # Convert to type:
+        if len(input_ids.shape) == 2:
+            x = input_ids.int().unsqueeze(1)
+            mask = attention_mask.unsqueeze(1) if attention_mask is not None else None
+            output = self.model(x=x, mask=mask).squeeze(1)
+        elif len(input_ids.shape) == 3:
+            x = input_ids.int()
+            mask = attention_mask if attention_mask is not None else None
+            output = self.model(x=x, mask=mask)
+        else:
+            raise ValueError("Input tensor must be of shape (Batch, Tokens) or (Batch, Sentences, Tokens).")
+        return output
     def get_sentence_embedding(
             self,
             torch.Tensor:
                 Sentence embeddings of shape (B, D)
         """
+        # Set the model task:
+        self.model.task = 'sentence_encoding'
+        output = self.call(input_ids, attention_mask)
         if normalize:
+            output = torch.nn.functional.normalize(output, p=2, dim=-1)
+        return output
     def similarity(self, embeddings_1: torch.Tensor, embeddings_2: torch.Tensor) -> torch.Tensor:
         """
         # Return cosine similarities (B, S):
         return embeddings[..., 0, 1]
     def forward(
         self,
         input_ids: torch.Tensor,
         Returns:
             Model-specific output as produced by `SegmentationNetwork`.
         """
+        self.model.task = 'segmentation'
         return self.model(
             x=input_ids,
             mask=attention_mask,
             **kwargs,
         )
+    def call(self, input_ids: torch.Tensor, attention_mask=None) -> torch.Tensor:
+        """
+        Internal method to handle different input shapes (task already selected).
+        Args:
+            input_ids:
+                Tensor of token IDs with shape
+                `(batch_size, sequence_length)`.
+            attention_mask:
+                Optional attention mask tensor.
+        """
+        # Convert to type:
+        if len(input_ids.shape) == 2:
+            x = input_ids.int().unsqueeze(1)
+            mask = attention_mask.unsqueeze(1) if attention_mask is not None else None
+            output = self.model(x=x, mask=mask).squeeze(1)
+        elif len(input_ids.shape) == 3:
+            x = input_ids.int()
+            mask = attention_mask if attention_mask is not None else None
+            output = self.model(x=x, mask=mask)
+        else:
+            raise ValueError("Input tensor must be of shape (Batch, Tokens) or (Batch, Sentences, Tokens).")
+        return output
     @staticmethod
     def to_model_config(config: SentenceCoseNetConfig) -> ModelConfig:
         """

src/model/segmentation.py CHANGED Viewed

@@ -24,7 +24,7 @@ class SegmentationNetwork(torch.nn.Module):
     The final output is a pair-wise distance matrix suitable for
     segmentation or boundary detection tasks.
     """
-    def __init__(self, model_config: ModelConfig, **kwargs):
         """
         Initialize the segmentation network.
@@ -73,6 +73,11 @@ class SegmentationNetwork(torch.nn.Module):
             module_list.append(encoder_block)
         self.encoder_blocks = torch.nn.ModuleList(module_list)
     def forward(self, x: torch.Tensor, mask: torch.Tensor = None, candidate_mask: torch.Tensor = None) -> torch.Tensor:
         """
@@ -126,12 +131,21 @@ class SegmentationNetwork(torch.nn.Module):
             mask = mask.reshape(_b, _s, _t)
             mask = torch.logical_not(mask) if not self.valid_padding else mask
         # Apply pooling:
         x, mask = self.pooling(x, mask=mask)
         # Compute distances:
         x = self.distance_layer(x)
         # Pass through CoSeNet:
         x = self.cosenet(x, mask=mask)

     The final output is a pair-wise distance matrix suitable for
     segmentation or boundary detection tasks.
     """
+    def __init__(self, model_config: ModelConfig, task='segmentation', **kwargs):
         """
         Initialize the segmentation network.
             module_list.append(encoder_block)
         self.encoder_blocks = torch.nn.ModuleList(module_list)
+        self.task = task
+        if self.task not in ['segmentation', 'similarity', 'token_encoding', 'sentence_encoding']:
+            raise ValueError(f"Invalid task '{self.task}'. Supported tasks are 'segmentation', 'similarity', "
+                             f"'token_encoding', and 'sentence_encoding'.")
     def forward(self, x: torch.Tensor, mask: torch.Tensor = None, candidate_mask: torch.Tensor = None) -> torch.Tensor:
         """
             mask = mask.reshape(_b, _s, _t)
             mask = torch.logical_not(mask) if not self.valid_padding else mask
+        if self.task == 'token_encoding':
+            return x
         # Apply pooling:
         x, mask = self.pooling(x, mask=mask)
+        if self.task == 'sentence_encoding':
+            return x
         # Compute distances:
         x = self.distance_layer(x)
+        if self.task == 'similarity':
+            return x
         # Pass through CoSeNet:
         x = self.cosenet(x, mask=mask)