File size: 440 Bytes
fe04db1
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
import torch
from transformers import AutoTokenizer

class DaedalusTokenizer(AutoTokenizer):
    def __init__(self, config):
        super(DaedalusTokenizer, self).__init__(config)
        self.config = config

    def encode(self, text):
        return self.encode_plus(text, max_length=self.config.max_seq_length, padding='max_length', truncation=True)

    def decode(self, ids):
        return self.decode(ids, skip_special_tokens=True)