Datatest-460m / tokenizer_config.json
scottejin's picture
Upload Datatest-460m v1 (SFT step 4000)
91ce3b2 verified
raw
history blame contribute delete
693 Bytes
{
"tokenizer_class": "NanochatTokenizer",
"auto_map": {
"AutoTokenizer": ["tokenization_nanochat.NanochatTokenizer", null]
},
"model_max_length": 7680,
"bos_token": "<|bos|>",
"eos_token": "<|assistant_end|>",
"pad_token": "<|assistant_end|>",
"clean_up_tokenization_spaces": false,
"add_bos_token": false,
"chat_template": "{{ '<|bos|>' }}{% for message in messages %}{% if message['role'] == 'user' %}{{ '<|user_start|>' + message['content'] + '<|user_end|>' }}{% elif message['role'] == 'assistant' %}{{ '<|assistant_start|>' + message['content'] + '<|assistant_end|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant_start|>' }}{% endif %}"
}