Monostich / tokenizer_config.json
kerzgrr's picture
Upload 8 files
28727e9 verified
{
"tokenizer_class": "GPT2TokenizerFast",
"tokenizer_file": "tokenizer.json",
"model_max_length": 1024,
"add_prefix_space": true,
"clean_up_tokenization_spaces": false,
"bos_token": "<|begin_of_text|>",
"eos_token": "<|eot_id|>",
"eot_token": "<|eot_id|>",
"pad_token": "<|pad|>",
"unk_token": "<|unk|>",
"additional_special_tokens": [
"<|start_header_id|>",
"<|end_header_id|>",
"<|eot_id|>"
],
"added_tokens_decoder": {
"0": { "content": "<|pad|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true },
"1": { "content": "<|unk|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true },
"2": { "content": "<|begin_of_text|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true },
"3": { "content": "<|end_of_text|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true },
"4": { "content": "<|start_header_id|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true },
"5": { "content": "<|end_header_id|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true },
"6": { "content": "<|eot_id|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }
},
"chat_template": "{% for message in messages %}{% if loop.first %}<|begin_of_text|>{% endif %}<|start_header_id|>{{ message['role'] }}<|end_header_id|>\n\n{{ message['content'] }}<|eot_id|>{% endfor %}{% if add_generation_prompt %}<|start_header_id|>assistant<|end_header_id|>\n\n{% endif %}"
}