Upload tokenizer
Browse files- tokenizer_script.py +7 -5
tokenizer_script.py
CHANGED
|
@@ -113,14 +113,16 @@ class CharacterTokenizer(PreTrainedTokenizer):
|
|
| 113 |
|
| 114 |
@classmethod
|
| 115 |
def from_json(cls, vocab_file, **kwargs):
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
|
|
|
| 120 |
|
| 121 |
@classmethod
|
| 122 |
def from_vocab(cls, vocab, **kwargs):
|
| 123 |
-
|
|
|
|
| 124 |
|
| 125 |
@classmethod
|
| 126 |
def from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs):
|
|
|
|
| 113 |
|
| 114 |
@classmethod
|
| 115 |
def from_json(cls, vocab_file, **kwargs):
|
| 116 |
+
print('vocab file is:',vocab_file)
|
| 117 |
+
with open(vocab_file, 'r', encoding='utf-8') as f:
|
| 118 |
+
vocab = json.load(f)
|
| 119 |
+
|
| 120 |
+
return cls(vocab=vocab, **kwargs)
|
| 121 |
|
| 122 |
@classmethod
|
| 123 |
def from_vocab(cls, vocab, **kwargs):
|
| 124 |
+
print('vocab are:',vocab)
|
| 125 |
+
return cls(vocab=vocab, **kwargs)
|
| 126 |
|
| 127 |
@classmethod
|
| 128 |
def from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs):
|