Spaces:
Sleeping
Sleeping
| import torch | |
| class CharacterLevelTokenizer: | |
| def __init__(self,data): | |
| self.data = data | |
| self.vocab = sorted(list(set(self.data))) | |
| self.VOCAB_SIZE = len(self.vocab) | |
| self.i_s = {i:s for i,s in enumerate(self.vocab)} | |
| self.s_i = {s:i for i,s in self.i_s.items()} | |
| def encode(self,s): | |
| return torch.tensor([self.s_i[c] for c in s],dtype=torch.long) | |
| def decode(self,s): | |
| return ''.join([self.i_s[i.item()] for i in s]) |