Upload tokenization_codegen25.py
Browse files
tokenization_codegen25.py
CHANGED
|
@@ -166,7 +166,7 @@ class CodeGen25Tokenizer(PreTrainedTokenizer):
|
|
| 166 |
|
| 167 |
def _convert_id_to_token(self, index):
|
| 168 |
"""Converts an index (integer) in a token (str) using the vocab."""
|
| 169 |
-
return self.encoder.decode_single_token_bytes(index).decode("utf-8")
|
| 170 |
|
| 171 |
def _decode(self, token_ids: List[int], skip_special_tokens: bool = False, **kwargs):
|
| 172 |
if skip_special_tokens:
|
|
|
|
| 166 |
|
| 167 |
def _convert_id_to_token(self, index):
|
| 168 |
"""Converts an index (integer) in a token (str) using the vocab."""
|
| 169 |
+
return self.encoder.decode_single_token_bytes(index).decode("utf-8", errors="ignore")
|
| 170 |
|
| 171 |
def _decode(self, token_ids: List[int], skip_special_tokens: bool = False, **kwargs):
|
| 172 |
if skip_special_tokens:
|