distil-test / tokenizer_config.json
Godwinlyamba's picture
Upload folder using huggingface_hub
673da73 verified
{
"auto_map": {
"AutoTokenizer": [
"tokenization_kimi.TikTokenTokenizer",
null
]
},
"backend": "tokenizers",
"bos_token": "[BOS]",
"clean_up_tokenization_spaces": false,
"eos_token": "[EOS]",
"extra_special_tokens": [
"<|im_end|>",
"<|im_user|>",
"<|im_assistant|>",
"<|im_system|>",
"<|im_middle|>",
"<|media_start|>",
"<|media_content|>",
"<|media_end|>",
"<|media_pad|>"
],
"is_local": false,
"local_files_only": false,
"model_max_length": 1000000000000000019884624838656,
"pad_token": "[PAD]",
"tokenizer_class": "TokenizersBackend",
"unk_token": "[UNK]",
"chat_template": "{%- macro render_content(msg) -%}\n {%- set c = msg.get('content') -%}\n {%- if c is string -%}\n {{ c }}\n {%- elif c is not none -%}\n {% for content in c -%}\n {% if content['type'] == 'image' or content['type'] == 'image_url' -%}\n <|media_begin|>image<|media_content|><|media_pad|><|media_end|>\n {% elif content['type'] == 'video' or content['type']== 'video_url'-%}\n <|kimi_k25_video_placeholder|>\n {% else -%}\n {{ content['text'] }}\n {%- endif -%}\n {%- endfor -%}\n {%- endif -%}\n{%- endmacro -%}\n\n{% macro set_roles(message) -%}\n {%- set role_name = message.get('name') or message['role'] -%}\n {%- if message['role'] == 'user' -%}\n <|im_user|>{{role_name}}<|im_middle|>\n {%- elif message['role'] == 'assistant' -%}\n <|im_assistant|>{{role_name}}<|im_middle|>\n {%- else -%}\n <|im_system|>{{role_name}}<|im_middle|>\n {%- endif -%}\n{%- endmacro -%}\n\n\n{%- macro render_toolcalls(message) -%}\n <|tool_calls_section_begin|>\n {%- for tool_call in message['tool_calls'] -%}\n {%- set formatted_id = tool_call['id'] -%}\n <|tool_call_begin|>{{ formatted_id }}<|tool_call_argument_begin|>{% if tool_call['function']['arguments'] is string %}{{ tool_call['function']['arguments'] }}{% else %}{{ tool_call['function']['arguments'] | tojson }}{% endif %}<|tool_call_end|>\n {%- endfor -%}\n <|tool_calls_section_end|>\n{%- endmacro -%}\n\n\n{%- set preserve_thinking = preserve_thinking | default(false) -%}\n{# Find last non-tool-call assistant message. If preserve_thinking, keep -1 so hist is empty and all msgs use suffix (retain reasoning). #}\n{%- set ns = namespace(last_non_tool_call_assistant_msg=-1) -%}\n{%- if not preserve_thinking -%}\n{%- for idx in range(messages|length-1, -1, -1) -%}\n {%- if messages[idx]['role'] == 'assistant' and not messages[idx].get('tool_calls') -%}\n {%- set ns.last_non_tool_call_assistant_msg = idx -%}\n {%- break -%}\n {%- endif -%}\n{%- endfor -%}\n{%- endif -%}\n\n{# split all messages into history & suffix, reasoning_content in suffix should be reserved.#}\n{%- set hist_msgs = messages[:ns.last_non_tool_call_assistant_msg+1] -%}\n{%- set suffix_msgs = messages[ns.last_non_tool_call_assistant_msg+1:] -%}\n\n{%- if tools -%}\n {%- if tools_ts_str -%}\n <|im_system|>tool_declare<|im_middle|>{{ tools_ts_str }}<|im_end|>\n {%- else -%}\n <|im_system|>tool_declare<|im_middle|>{{ tools | tojson(separators=(',', ':')) }}<|im_end|>\n {%- endif -%}\n{%- endif -%}\n\n \n{%- for message in hist_msgs -%}\n {{set_roles(message)}}\n {%- if message['role'] == 'assistant' -%}\n <think></think>{{render_content(message)}}\n {%- if message.get('tool_calls') -%}\n {{render_toolcalls(message)}}\n {%- endif -%}\n {%- elif message['role'] == 'tool' -%}\n {%- set tool_call_id = message.tool_call_id -%}\n ## Return of {{ tool_call_id }}\n{{render_content(message)}}\n {%- elif message['content'] is not none -%}\n {{render_content(message)}}\n {%- endif -%}\n <|im_end|>\n{%- endfor -%}\n\n{%- for message in suffix_msgs -%}\n {{set_roles(message)}}\n {%- if message['role'] == 'assistant' -%}\n {%- if thinking is defined and thinking is false and preserve_thinking is false -%}\n <think></think>{{render_content(message)}}\n {%- else -%}\n {%- set rc = message.get('reasoning', message.get('reasoning_content', '')) -%}\n <think>{{rc}}</think>{{render_content(message)}}\n {%- endif -%}\n {%- if message.get('tool_calls') -%}\n {{render_toolcalls(message)}}\n {%- endif -%}\n {%- elif message['role'] == 'tool' -%}\n {%- set tool_call_id = message.tool_call_id -%}\n ## Return of {{ tool_call_id }}\n{{render_content(message)}}\n {%- elif message['content'] is not none -%}\n {{render_content(message)}}\n {%- endif -%}\n <|im_end|>\n{%- endfor -%}\n\n\n{%- if add_generation_prompt -%}\n <|im_assistant|>assistant<|im_middle|>\n {%- if thinking is defined and thinking is false -%}\n <think></think>\n {%- else -%}\n <think>\n {%- endif -%}\n{%- endif -%}"
}