fix convert token to id
Browse files- sentencepiece_ja.py +3 -0
sentencepiece_ja.py
CHANGED
|
@@ -41,6 +41,9 @@ class SentencePieceJA(PreTrainedTokenizer):
|
|
| 41 |
return self._tokenizer.encode(text).tokens
|
| 42 |
|
| 43 |
def _convert_token_to_id(self, token):
|
|
|
|
|
|
|
|
|
|
| 44 |
return self._tokenizer.encode(token).ids[0]
|
| 45 |
|
| 46 |
def _convert_id_to_token(self, index: int) -> str:
|
|
|
|
| 41 |
return self._tokenizer.encode(text).tokens
|
| 42 |
|
| 43 |
def _convert_token_to_id(self, token):
|
| 44 |
+
ids = self._tokenizer.encode(token).ids
|
| 45 |
+
if len(ids) == 0:
|
| 46 |
+
return self.unk_token_id
|
| 47 |
return self._tokenizer.encode(token).ids[0]
|
| 48 |
|
| 49 |
def _convert_id_to_token(self, index: int) -> str:
|