vfinal-merged / tokenizer_config.json
thetmon's picture
Upload tokenizer_config.json with huggingface_hub
fa2a952 verified
{
"add_prefix_space": false,
"backend": "tokenizers",
"bos_token": null,
"clean_up_tokenization_spaces": false,
"eos_token": "<|im_end|>",
"errors": "replace",
"extra_special_tokens": {
"<|im_start|>": "<|im_start|>",
"<|im_end|>": "<|im_end|>",
"<|object_ref_start|>": "<|object_ref_start|>",
"<|object_ref_end|>": "<|object_ref_end|>",
"<|box_start|>": "<|box_start|>",
"<|box_end|>": "<|box_end|>",
"<|quad_start|>": "<|quad_start|>",
"<|quad_end|>": "<|quad_end|>",
"<|vision_start|>": "<|vision_start|>",
"<|vision_end|>": "<|vision_end|>",
"<|vision_pad|>": "<|vision_pad|>",
"<|image_pad|>": "<|image_pad|>",
"<|video_pad|>": "<|video_pad|>"
},
"is_local": false,
"model_max_length": 1010000,
"pad_token": "<|endoftext|>",
"split_special_tokens": false,
"tokenizer_class": "Qwen2Tokenizer",
"unk_token": null,
"chat_template": "{%- if tools %}{{- '<|im_start|>system\\n' }}{%- if messages[0].role == 'system' %}{{- messages[0].content + '\\n\\n' }}{%- endif %}{{- \"You are Qwen, made by Alibaba Cloud. You are a helpful assistant./no_think\\n\\n\" }}{{- '<tools>\\n' }}{%- for tool in tools %}{{- tool | tojson + '\\n' }}{%- endfor %}{{- '</tools>\\n\\n' }}{{- '<tool_call>\\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\\n</tool_call><|im_end|>\\n' }}{%- else %}{%- if messages[0].role == 'system' %}{{- '<|im_start|>system\\n' + messages[0].content + '/no_think<|im_end|>\\n' }}{%- else %}{{- '<|im_start|>system\\nYou are Qwen, made by Alibaba Cloud. You are a helpful assistant./no_think<|im_end|>\\n' }}{%- endif %}{%- endif %}{%- for message in messages %}{%- if message.role == 'user' %}{%- set content = message.content %}{%- set fl = content.split('\\n')[0].lower() %}{%- set formats = ['toml', 'xml', 'yaml', 'json', 'csv'] %}{%- set ns = namespace(target='', found=false) %}{%- set words = fl.split(' ') %}{%- for word in words | reverse %}{%- if not ns.found %}{%- for f in formats %}{%- if f in word and not ns.found %}{%- set ns.target = f %}{%- set ns.found = true %}{%- endif %}{%- endfor %}{%- endif %}{%- endfor %}{%- if ns.target == 'json' %}{%- set suffix = '\\n\\nOutput ONLY raw JSON. No markdown code fences. No explanation or prefix text. Do not output TOML, YAML, or any other format.' %}{%- elif ns.target == 'toml' %}{%- set suffix = '\\n\\nOutput ONLY raw TOML. No markdown code fences. No explanation or prefix text. Use [table] for nested objects and [[array]] for arrays of tables.' %}{%- elif ns.target == 'xml' %}{%- set suffix = '\\n\\nOutput ONLY raw XML. No markdown code fences. No explanation or prefix text. Use full descriptive tag names (not abbreviations like <n> or <s>). Escape & as &amp;.' %}{%- elif ns.target == 'yaml' %}{%- set suffix = '\\n\\nOutput ONLY raw YAML. No markdown code fences. No explanation or prefix text. Use proper indentation with 2 spaces.' %}{%- elif ns.target == 'csv' %}{%- set suffix = '\\n\\nOutput ONLY raw CSV. No markdown code fences. No explanation or prefix text. Include a header row.' %}{%- else %}{%- set suffix = '\\n\\nOutput ONLY the requested format. No markdown code fences. No explanation or prefix text.' %}{%- endif %}{{- '<|im_start|>user\\n' + content + suffix + '<|im_end|>\\n' }}{%- elif message.role == 'assistant' %}{{- '<|im_start|>assistant\\n' + message.content + '<|im_end|>\\n' }}{%- elif message.role != 'system' %}{{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>\\n' }}{%- endif %}{%- endfor %}{%- if add_generation_prompt %}{{- '<|im_start|>assistant\\n' }}{%- endif %}"
}