fix bug in tokenizer `decode` method
#84
by
njunlp
- opened
- tokenization_chatglm.py +1 -1
tokenization_chatglm.py
CHANGED
|
@@ -282,7 +282,7 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
|
|
| 282 |
return ""
|
| 283 |
if self.pad_token_id in token_ids: # remove pad
|
| 284 |
token_ids = list(filter((self.pad_token_id).__ne__, token_ids))
|
| 285 |
-
return
|
| 286 |
|
| 287 |
def _convert_token_to_id(self, token):
|
| 288 |
""" Converts a token (str) in an id using the vocab. """
|
|
|
|
| 282 |
return ""
|
| 283 |
if self.pad_token_id in token_ids: # remove pad
|
| 284 |
token_ids = list(filter((self.pad_token_id).__ne__, token_ids))
|
| 285 |
+
return self.sp_tokenizer.decode(token_ids)
|
| 286 |
|
| 287 |
def _convert_token_to_id(self, token):
|
| 288 |
""" Converts a token (str) in an id using the vocab. """
|