Commit
·
087b7ec
1
Parent(s):
a20d733
update tokenizer.py
Browse files- tokenizer.py +4 -4
tokenizer.py
CHANGED
|
@@ -112,10 +112,10 @@ class PhobertTokenizer(PreTrainedTokenizer):
|
|
| 112 |
self.merges_file = merges_file
|
| 113 |
|
| 114 |
self.encoder = {}
|
| 115 |
-
self.
|
| 116 |
-
self.
|
| 117 |
-
self.
|
| 118 |
-
self.
|
| 119 |
|
| 120 |
self.add_from_file(vocab_file)
|
| 121 |
self.encoder[self.mask_token] = len(self.encoder)
|
|
|
|
| 112 |
self.merges_file = merges_file
|
| 113 |
|
| 114 |
self.encoder = {}
|
| 115 |
+
self.encoder[self.bos_token] = 0
|
| 116 |
+
self.encoder[self.pad_token] = 1
|
| 117 |
+
self.encoder[self.eos_token] = 2
|
| 118 |
+
self.encoder[self.unk_token] = 3
|
| 119 |
|
| 120 |
self.add_from_file(vocab_file)
|
| 121 |
self.encoder[self.mask_token] = len(self.encoder)
|