Update tokenizer.py
Browse files- tokenizer.py +7 -0
tokenizer.py
CHANGED
|
@@ -29,6 +29,10 @@ class ChessTokenizer(PreTrainedTokenizer):
|
|
| 29 |
BOS_TOKEN = "[BOS]" # Beginning of Sequence (Start of Game)
|
| 30 |
EOS_TOKEN = "[EOS]" # End of Sequence (End of Game)
|
| 31 |
UNK_TOKEN = "[UNK]"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
def __init__(
|
| 34 |
self,
|
|
@@ -47,6 +51,9 @@ class ChessTokenizer(PreTrainedTokenizer):
|
|
| 47 |
kwargs.pop("eos_token", None)
|
| 48 |
kwargs.pop("unk_token", None)
|
| 49 |
|
|
|
|
|
|
|
|
|
|
| 50 |
if vocab is not None:
|
| 51 |
self._vocab = vocab
|
| 52 |
elif vocab_file is not None and os.path.exists(vocab_file):
|
|
|
|
| 29 |
BOS_TOKEN = "[BOS]" # Beginning of Sequence (Start of Game)
|
| 30 |
EOS_TOKEN = "[EOS]" # End of Sequence (End of Game)
|
| 31 |
UNK_TOKEN = "[UNK]"
|
| 32 |
+
|
| 33 |
+
vocab_files_names = {
|
| 34 |
+
"vocab_file": "vocab.json"
|
| 35 |
+
}
|
| 36 |
|
| 37 |
def __init__(
|
| 38 |
self,
|
|
|
|
| 51 |
kwargs.pop("eos_token", None)
|
| 52 |
kwargs.pop("unk_token", None)
|
| 53 |
|
| 54 |
+
|
| 55 |
+
self.vocab_file = vocab_file
|
| 56 |
+
|
| 57 |
if vocab is not None:
|
| 58 |
self._vocab = vocab
|
| 59 |
elif vocab_file is not None and os.path.exists(vocab_file):
|