from transformers import PreTrainedTokenizer AUTO_MAP = { "AutoTokenizer": "tokenization_makemore.MakemoreTokenizer" } class MakemoreTokenizer(PreTrainedTokenizer): def __init__(self, **kwargs): self._stoi = {'.': 0, **{chr(ord('a') + i): i + 1 for i in range(26)}} self._itos = {v: k for k, v in self._stoi.items()} super().__init__(**kwargs) @property def vocab_size(self): return 27 def get_vocab(self): return dict(self._stoi) def _tokenize(self, text): return list(text.lower()) def _convert_token_to_id(self, token): return self._stoi.get(token, 0) def _convert_id_to_token(self, index): return self._itos.get(index, '.') def save_vocabulary(self, save_directory, filename_prefix=None): return ()