huuminh365 commited on
Commit
087b7ec
·
1 Parent(s): a20d733

update tokenizer.py

Browse files
Files changed (1) hide show
  1. tokenizer.py +4 -4
tokenizer.py CHANGED
@@ -112,10 +112,10 @@ class PhobertTokenizer(PreTrainedTokenizer):
112
  self.merges_file = merges_file
113
 
114
  self.encoder = {}
115
- self.encode[self.bos_token] = 0
116
- self.encode[self.pad_token] = 1
117
- self.encode[self.eos_token] = 2
118
- self.encode[self.unk_token] = 3
119
 
120
  self.add_from_file(vocab_file)
121
  self.encoder[self.mask_token] = len(self.encoder)
 
112
  self.merges_file = merges_file
113
 
114
  self.encoder = {}
115
+ self.encoder[self.bos_token] = 0
116
+ self.encoder[self.pad_token] = 1
117
+ self.encoder[self.eos_token] = 2
118
+ self.encoder[self.unk_token] = 3
119
 
120
  self.add_from_file(vocab_file)
121
  self.encoder[self.mask_token] = len(self.encoder)