fix issue in get vocab
#18
by
tianxie-sf
- opened
- tokenization_xgen.py +1 -1
tokenization_xgen.py
CHANGED
|
@@ -139,7 +139,7 @@ class XgenTokenizer(PreTrainedTokenizer):
|
|
| 139 |
|
| 140 |
def get_vocab(self):
|
| 141 |
"""Returns vocab as a dict"""
|
| 142 |
-
vocab = {self.
|
| 143 |
return vocab
|
| 144 |
|
| 145 |
def _tokenize(self, text, **kwargs):
|
|
|
|
| 139 |
|
| 140 |
def get_vocab(self):
|
| 141 |
"""Returns vocab as a dict"""
|
| 142 |
+
vocab = {self.encoder.decode_single_token_bytes(i): i for i in range(self.vocab_size)}
|
| 143 |
return vocab
|
| 144 |
|
| 145 |
def _tokenize(self, text, **kwargs):
|