Taykhoom
/

RNA-MSM

@@ -4,6 +4,7 @@ from typing import Dict, List, Optional, Union
 import torch
 from transformers import PreTrainedTokenizer
 _VOCAB = {
@@ -154,9 +155,9 @@ class RNAMSMTokenizer(PreTrainedTokenizer):
         if return_tensors == "pt":
             input_ids = torch.tensor(input_ids, dtype=torch.long)
             attention_mask = torch.tensor(attention_mask, dtype=torch.long)
-            return {"input_ids": input_ids, "attention_mask": attention_mask}
-        return {"input_ids": input_ids, "attention_mask": attention_mask}
     def _tokenize_single(self, sequence, add_special_tokens=True):
         tokens = list(sequence)
@@ -223,9 +224,9 @@ class RNAMSMTokenizer(PreTrainedTokenizer):
         if return_tensors == "pt":
             batch_ids = torch.tensor(batch_ids, dtype=torch.long)
             batch_mask = torch.tensor(batch_mask, dtype=torch.long)
-            return {"input_ids": batch_ids, "attention_mask": batch_mask}
-        return {"input_ids": batch_ids, "attention_mask": batch_mask}
     def decode(self, token_ids, skip_special_tokens=False, **kwargs):
         if isinstance(token_ids, torch.Tensor):

 import torch
 from transformers import PreTrainedTokenizer
+from transformers.tokenization_utils_base import BatchEncoding
 _VOCAB = {
         if return_tensors == "pt":
             input_ids = torch.tensor(input_ids, dtype=torch.long)
             attention_mask = torch.tensor(attention_mask, dtype=torch.long)
+            return BatchEncoding({"input_ids": input_ids, "attention_mask": attention_mask})
+        return BatchEncoding({"input_ids": input_ids, "attention_mask": attention_mask})
     def _tokenize_single(self, sequence, add_special_tokens=True):
         tokens = list(sequence)
         if return_tensors == "pt":
             batch_ids = torch.tensor(batch_ids, dtype=torch.long)
             batch_mask = torch.tensor(batch_mask, dtype=torch.long)
+            return BatchEncoding({"input_ids": batch_ids, "attention_mask": batch_mask})
+        return BatchEncoding({"input_ids": batch_ids, "attention_mask": batch_mask})
     def decode(self, token_ids, skip_special_tokens=False, **kwargs):
         if isinstance(token_ids, torch.Tensor):