gemma-7b-it-sindhi-tokenizer / tokenizer_config.json
Kashif786's picture
Add 20k Sindhi unigram tokens to Gemma-7B-it base for thesis research
780c97c verified
raw
history blame contribute delete
489 Bytes
{
"backend": "tokenizers",
"bos_token": "<bos>",
"clean_up_tokenization_spaces": false,
"eos_token": "<eos>",
"extra_special_tokens": [
"<start_of_turn>",
"<end_of_turn>"
],
"is_local": false,
"mask_token": "<mask>",
"model_max_length": 1000000000000000019884624838656,
"pad_token": "<pad>",
"sp_model_kwargs": {},
"spaces_between_special_tokens": false,
"tokenizer_class": "GemmaTokenizer",
"unk_token": "<unk>",
"use_default_system_prompt": false
}