hindi-embedding-foundational-model / tokenizer_config.json
DeepMostInnovations's picture
Upload Hindi embeddings model and all associated files
d5947d7 verified
{
"name": "hindi-tokenizer",
"version": "1.0.0",
"model_type": "sentencepiece",
"sp_model_type": "unigram",
"tokenizer_class": "SentencePieceTokenizer",
"bos_token": "<s>",
"eos_token": "</s>",
"pad_token": "<pad>",
"unk_token": "<unk>",
"mask_token": "<mask>",
"cls_token": "<cls>",
"sep_token": "<sep>",
"model_max_length": 512,
"vocab_size": 16000,
"do_lower_case": false,
"special_tokens_map": {
"pad_token": "<pad>",
"unk_token": "<unk>",
"bos_token": "<s>",
"eos_token": "</s>",
"mask_token": "<mask>",
"sep_token": "<sep>",
"cls_token": "<cls>"
},
"tokenizer_file": "tokenizer.model",
"auto_map": {
"AutoTokenizer": [
"PreTrainedTokenizerFast",
null
]
}
}