{ "vocab_size": 50000, "corpus_file": "kannada_corpus.txt", "min_frequency": 1, "language": "Kannada (kn)", "pre_tokenizer": "Whitespace" }