balochi-tokenizer / tokenizer_config.json
hafeez007's picture
Add Balochi SentencePiece BPE tokenizer
958fa32 verified
{
"add_prefix_space": true,
"backend": "tokenizers",
"bos_token": "<s>",
"cls_token": "[CLS]",
"eos_token": "</s>",
"mask_token": "[MASK]",
"model_max_length": 512,
"pad_token": "[PAD]",
"sep_token": "[SEP]",
"sp_model_kwargs": {
"enable_sampling": false
},
"tokenizer_class": "XLMRobertaTokenizer",
"unk_token": "[UNK]",
"language": "bal",
"keep_accents": true,
"strip_accents": false,
"do_lower_case": false
}