{ "tokenizer_class": "PreTrainedTokenizerFast", "model_type": "BPE", "vocab_size": 50000, "language": "kn", "special_tokens": { "pad_token": "[PAD]", "unk_token": "[UNK]", "cls_token": "[CLS]", "sep_token": "[SEP]", "mask_token": "[MASK]" } }