mohsennp commited on
Commit
91c13df
·
verified ·
1 Parent(s): d717ad8

Upload tokenizer

Browse files
Files changed (1) hide show
  1. tokenization_encodon.py +1 -0
tokenization_encodon.py CHANGED
@@ -43,6 +43,7 @@ class EnCodonTokenizer(PreTrainedTokenizer):
43
  self.codons = self.get_all_codons(seq_type=seq_type)
44
  self.seq_type = seq_type
45
  self.special_tokens = [cls_token, sep_token, unk_token, pad_token, mask_token]
 
46
 
47
  self.encoder = {k: i for i, k in enumerate(self.special_tokens + self.codons)}
48
  self.decoder = {i: k for k, i in self.encoder.items()}
 
43
  self.codons = self.get_all_codons(seq_type=seq_type)
44
  self.seq_type = seq_type
45
  self.special_tokens = [cls_token, sep_token, unk_token, pad_token, mask_token]
46
+ self.special_tokens = [str(token) for token in self.special_tokens]
47
 
48
  self.encoder = {k: i for i, k in enumerate(self.special_tokens + self.codons)}
49
  self.decoder = {i: k for k, i in self.encoder.items()}