impresso-project
/

ner-stacked-bert-multilingual-light

@@ -262,10 +262,11 @@ def get_entities(tokens, tags, confidences, text):
 def realign(
-    tokens, out_label_preds, softmax_scores, tokenizer, reverted_label_map
 ):
     preds_list, words_list, confidence_list = [], [], []
-    word_ids = tokenizer(tokens, is_split_into_words=True).word_ids()
     for idx, word in enumerate(tokens):
         beginning_index = word_ids.index(idx)
         try:
@@ -701,11 +702,12 @@ class MultitaskTokenClassificationPipeline(Pipeline):
             truncation=True,
             max_length=512,
         )
-        return tokenized_inputs, text, tokens
     def _forward(self, inputs):
-        inputs, text, tokens = inputs
         input_ids = torch.tensor([inputs["input_ids"]], dtype=torch.long).to(
             self.model.device
@@ -715,7 +717,7 @@ class MultitaskTokenClassificationPipeline(Pipeline):
         )
         with torch.no_grad():
             outputs = self.model(input_ids, attention_mask)
-        return outputs, text, tokens
     def is_within(self, entity1, entity2):
         """Check if entity1 is fully within the bounds of entity2."""
@@ -731,7 +733,7 @@ class MultitaskTokenClassificationPipeline(Pipeline):
         :param kwargs:
         :return:
         """
-        tokens_result, text, tokens = outputs
         predictions = {}
         confidence_scores = {}
@@ -742,6 +744,7 @@ class MultitaskTokenClassificationPipeline(Pipeline):
         entities = {}
         for task in predictions.keys():
             words_list, preds_list, confidence_list = realign(
                 tokens,
                 predictions[task],
                 confidence_scores[task],

 def realign(
+    word_ids, tokens, out_label_preds, softmax_scores, tokenizer, reverted_label_map
 ):
     preds_list, words_list, confidence_list = [], [], []
+    # word_ids = tokenizer(tokens, is_split_into_words=True).word_ids()
     for idx, word in enumerate(tokens):
         beginning_index = word_ids.index(idx)
         try:
             truncation=True,
             max_length=512,
         )
+        word_ids = tokenized_inputs.word_ids()
+        return tokenized_inputs, word_ids, text, tokens
     def _forward(self, inputs):
+        inputs, word_ids, text, tokens = inputs
         input_ids = torch.tensor([inputs["input_ids"]], dtype=torch.long).to(
             self.model.device
         )
         with torch.no_grad():
             outputs = self.model(input_ids, attention_mask)
+        return outputs, word_ids, text, tokens
     def is_within(self, entity1, entity2):
         """Check if entity1 is fully within the bounds of entity2."""
         :param kwargs:
         :return:
         """
+        tokens_result, word_ids, text, tokens = outputs
         predictions = {}
         confidence_scores = {}
         entities = {}
         for task in predictions.keys():
             words_list, preds_list, confidence_list = realign(
+                word_ids,
                 tokens,
                 predictions[task],
                 confidence_scores[task],