Update tokenization_chatglm.py
Browse filesBased on the [documentation](https://huggingface.co/docs/transformers/main_classes/tokenizer#transformers.PreTrainedTokenizer.decode) and reference implementation of the Hugging Face tokenizer, the `decode` method should accept both a single integer or an empty list as input.
This simple modification would make ChatGLM-6B compatible with inference frameworks such as [Basaran](https://github.com/hyperonym/basaran).
- tokenization_chatglm.py +4 -0
tokenization_chatglm.py
CHANGED
|
@@ -264,6 +264,10 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
|
|
| 264 |
spaces_between_special_tokens: bool = True,
|
| 265 |
**kwargs
|
| 266 |
) -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 267 |
if isinstance(token_ids[0], list):
|
| 268 |
tokens = []
|
| 269 |
for single_token_ids in token_ids:
|
|
|
|
| 264 |
spaces_between_special_tokens: bool = True,
|
| 265 |
**kwargs
|
| 266 |
) -> str:
|
| 267 |
+
if not isinstance(token_ids, list):
|
| 268 |
+
token_ids = [token_ids]
|
| 269 |
+
if len(token_ids) == 0:
|
| 270 |
+
return ""
|
| 271 |
if isinstance(token_ids[0], list):
|
| 272 |
tokens = []
|
| 273 |
for single_token_ids in token_ids:
|