Upload tokenizer
Browse files- tokenization_encodon.py +1 -0
tokenization_encodon.py
CHANGED
|
@@ -43,6 +43,7 @@ class EnCodonTokenizer(PreTrainedTokenizer):
|
|
| 43 |
self.codons = self.get_all_codons(seq_type=seq_type)
|
| 44 |
self.seq_type = seq_type
|
| 45 |
self.special_tokens = [cls_token, sep_token, unk_token, pad_token, mask_token]
|
|
|
|
| 46 |
|
| 47 |
self.encoder = {k: i for i, k in enumerate(self.special_tokens + self.codons)}
|
| 48 |
self.decoder = {i: k for k, i in self.encoder.items()}
|
|
|
|
| 43 |
self.codons = self.get_all_codons(seq_type=seq_type)
|
| 44 |
self.seq_type = seq_type
|
| 45 |
self.special_tokens = [cls_token, sep_token, unk_token, pad_token, mask_token]
|
| 46 |
+
self.special_tokens = [str(token) for token in self.special_tokens]
|
| 47 |
|
| 48 |
self.encoder = {k: i for i, k in enumerate(self.special_tokens + self.codons)}
|
| 49 |
self.decoder = {i: k for k, i in self.encoder.items()}
|