BathSalt-1 commited on
Commit
fe04db1
·
verified ·
1 Parent(s): 8b41d42

Create tokenizer.py

Browse files
Files changed (1) hide show
  1. tokenizer.py +13 -0
tokenizer.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoTokenizer
3
+
4
+ class DaedalusTokenizer(AutoTokenizer):
5
+ def __init__(self, config):
6
+ super(DaedalusTokenizer, self).__init__(config)
7
+ self.config = config
8
+
9
+ def encode(self, text):
10
+ return self.encode_plus(text, max_length=self.config.max_seq_length, padding='max_length', truncation=True)
11
+
12
+ def decode(self, ids):
13
+ return self.decode(ids, skip_special_tokens=True)