aframson
/

RDPD-mini

Text Generation

feature-extraction

Model card Files Files and versions

aframson commited on Sep 24, 2023

Commit

e7894e2

·

1 Parent(s): ea7f344

sd

Files changed (1) hide show

tokenizeConfig.py +2 -2

tokenizeConfig.py CHANGED Viewed

@@ -90,7 +90,7 @@ class OBITokenizer(PreTrainedTokenizer):
     def _tokenize(self, text):
         """Returns a tokenized string."""
         encoding = self.tokenizer.encode(text)
-        return encoding.tokens
     def _convert_token_to_id(self, token):
         """Converts a token (str) in an id using the vocab."""
@@ -102,7 +102,7 @@ class OBITokenizer(PreTrainedTokenizer):
     def convert_tokens_to_string(self, tokens):
         """Converts a sequence of tokens (string) into a single string."""
-        return self.tokenizer.decode(tokens).strip()
     def save_vocabulary(self, save_directory, filename_prefix: Optional[str] = None) -> Tuple[str]:
         """

     def _tokenize(self, text):
         """Returns a tokenized string."""
         encoding = self.tokenizer.encode(text)
+        return encoding.ids
     def _convert_token_to_id(self, token):
         """Converts a token (str) in an id using the vocab."""
     def convert_tokens_to_string(self, tokens):
         """Converts a sequence of tokens (string) into a single string."""
+        return self.tokenizer.decode(tokens)
     def save_vocabulary(self, save_directory, filename_prefix: Optional[str] = None) -> Tuple[str]:
         """