mideind
/

IceBERT-PoS

@@ -426,6 +426,25 @@ class IceBertPosForTokenClassification(PreTrainedModel):
         return self.predict_labels(batch_input_ids, batch_attention_mask, batch_word_mask)
     def predict_ifd_labels_from_text(
         self, sentences: List[List[str]], tokenizer, truncate: bool = False
     ) -> List[List[str]]:
@@ -444,17 +463,7 @@ class IceBertPosForTokenClassification(PreTrainedModel):
         """
         # Get model predictions in (category, [attributes]) format
         predictions = self.predict_labels_from_text(sentences, tokenizer, truncate)
-        # Time the IFD conversion
-        start_time = time.perf_counter()
-        ifd_predictions = []
-        for sentence_predictions in predictions:
-            ifd_labels = convert_predictions_to_ifd(sentence_predictions)  # (Ws,)
-            ifd_predictions.append(ifd_labels)
-        ifd_conversion_time = time.perf_counter() - start_time
-        logger.debug(f"IFD conversion took {ifd_conversion_time:.4f} seconds")
-        return ifd_predictions
     def _word_ids_to_word_mask(self, word_ids: List[int]) -> torch.Tensor:
         """

         return self.predict_labels(batch_input_ids, batch_attention_mask, batch_word_mask)
+    def convert_labels_to_ifd(self, predictions: List[List[Tuple[str, List[str]]]]) -> List[List[str]]:
+        """
+        Convert model predictions to IFD format labels.
+        Args:
+            predictions: List of sequences, each containing (category, [attributes]) per word
+        Returns:
+            List of IFD format labels per sentence
+        """
+        # Time the IFD conversion
+        start_time = time.perf_counter()
+        ifd_labels = []
+        for sentence_predictions in predictions:
+            ifd_labels.append(convert_predictions_to_ifd(sentence_predictions))
+        ifd_conversion_time = time.perf_counter() - start_time
+        logger.debug(f"IFD conversion took {ifd_conversion_time:.4f} seconds")
+        return ifd_labels
     def predict_ifd_labels_from_text(
         self, sentences: List[List[str]], tokenizer, truncate: bool = False
     ) -> List[List[str]]:
         """
         # Get model predictions in (category, [attributes]) format
         predictions = self.predict_labels_from_text(sentences, tokenizer, truncate)
+        return self.convert_labels_to_ifd(predictions)
     def _word_ids_to_word_mask(self, word_ids: List[int]) -> torch.Tensor:
         """