RangiLyu commited on
Commit
6809eb5
·
verified ·
1 Parent(s): d08f8d2

fix out of vocab token

Browse files
Files changed (1) hide show
  1. tokenization_interns1.py +4 -0
tokenization_interns1.py CHANGED
@@ -891,6 +891,10 @@ class InternS1Tokenizer(Qwen2Tokenizer):
891
  else:
892
  return self.encoder.get(token, self.encoder.get(self._unk_token))
893
 
 
 
 
 
894
  def convert_tokens_to_string(self, tokens):
895
  """Converts a sequence of tokens (string) in a single string."""
896
  text = "".join(tokens)
 
891
  else:
892
  return self.encoder.get(token, self.encoder.get(self._unk_token))
893
 
894
+ def _convert_id_to_token(self, index):
895
+ """Converts an index (integer) in a token (str) using the vocab."""
896
+ return self.decoder.get(index, "")
897
+
898
  def convert_tokens_to_string(self, tokens):
899
  """Converts a sequence of tokens (string) in a single string."""
900
  text = "".join(tokens)