ve_fvt_deepseek_racket / tokenizer_config.json
datapaf's picture
Upload tokenizer
78bf8d0 verified
{
"add_bos_token": true,
"add_eos_token": false,
"add_prefix_space": null,
"added_tokens_decoder": {
"39861": {
"content": "õ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"39862": {
"content": "÷",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"39863": {
"content": "Á",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"39864": {
"content": "ý",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"39865": {
"content": "À",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"39866": {
"content": "ÿ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"39867": {
"content": "ø",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"39868": {
"content": "ú",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"39869": {
"content": "þ",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"39870": {
"content": "ü",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"39871": {
"content": "ù",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"39872": {
"content": "ö",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"39873": {
"content": "û",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"39874": {
"content": "<|begin▁of▁sentence|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": true
},
"39875": {
"content": "<|end▁of▁sentence|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": true
},
"39876": {
"content": "<|fim▁hole|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"39877": {
"content": "<|fim▁begin|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"39878": {
"content": "<|fim▁end|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"39879": {
"content": "<pad>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"39880": {
"content": "<|User|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"39881": {
"content": "<|Assistant|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
},
"39882": {
"content": "<|EOT|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": false
}
},
"bos_token": "<|begin▁of▁sentence|>",
"clean_up_tokenization_spaces": false,
"eos_token": "<|end▁of▁sentence|>",
"extra_special_tokens": {},
"legacy": true,
"model_max_length": 16384,
"pad_token": "<|end▁of▁sentence|>",
"sp_model_kwargs": {},
"tokenizer_class": "LlamaTokenizerFast",
"unk_token": null,
"use_default_system_prompt": false
}