Update tokenization_chatglm.py
Browse files- tokenization_chatglm.py +18 -0
tokenization_chatglm.py
CHANGED
|
@@ -103,6 +103,24 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
|
|
| 103 |
def eos_token_id(self):
|
| 104 |
return self.get_command("<eos>")
|
| 105 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
@property
|
| 107 |
def vocab_size(self):
|
| 108 |
return self.tokenizer.n_words
|
|
|
|
| 103 |
def eos_token_id(self):
|
| 104 |
return self.get_command("<eos>")
|
| 105 |
|
| 106 |
+
@eos_token.setter
|
| 107 |
+
def eos_token(self, value):
|
| 108 |
+
if not isinstance(value, (str, AddedToken)) and value is not None:
|
| 109 |
+
raise ValueError("Cannot set a non-string value as the EOS token")
|
| 110 |
+
self._eos_token = value
|
| 111 |
+
|
| 112 |
+
@unk_token.setter
|
| 113 |
+
def unk_token(self, value):
|
| 114 |
+
if not isinstance(value, (str, AddedToken)) and value is not None:
|
| 115 |
+
raise ValueError("Cannot set a non-string value as the UNK token")
|
| 116 |
+
self._unk_token = value
|
| 117 |
+
|
| 118 |
+
@pad_token.setter
|
| 119 |
+
def pad_token(self, value):
|
| 120 |
+
if not isinstance(value, (str, AddedToken)) and value is not None:
|
| 121 |
+
raise ValueError("Cannot set a non-string value as the PAD token")
|
| 122 |
+
self._pad_token = value
|
| 123 |
+
|
| 124 |
@property
|
| 125 |
def vocab_size(self):
|
| 126 |
return self.tokenizer.n_words
|