thetmon commited on
Commit
fa2a952
·
verified ·
1 Parent(s): 69e7c3c

Upload tokenizer_config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +18 -17
tokenizer_config.json CHANGED
@@ -5,25 +5,26 @@
5
  "clean_up_tokenization_spaces": false,
6
  "eos_token": "<|im_end|>",
7
  "errors": "replace",
8
- "extra_special_tokens": [
9
- "<|im_start|>",
10
- "<|im_end|>",
11
- "<|object_ref_start|>",
12
- "<|object_ref_end|>",
13
- "<|box_start|>",
14
- "<|box_end|>",
15
- "<|quad_start|>",
16
- "<|quad_end|>",
17
- "<|vision_start|>",
18
- "<|vision_end|>",
19
- "<|vision_pad|>",
20
- "<|image_pad|>",
21
- "<|video_pad|>"
22
- ],
23
  "is_local": false,
24
  "model_max_length": 1010000,
25
  "pad_token": "<|endoftext|>",
26
  "split_special_tokens": false,
27
  "tokenizer_class": "Qwen2Tokenizer",
28
- "unk_token": null
29
- }
 
 
5
  "clean_up_tokenization_spaces": false,
6
  "eos_token": "<|im_end|>",
7
  "errors": "replace",
8
+ "extra_special_tokens": {
9
+ "<|im_start|>": "<|im_start|>",
10
+ "<|im_end|>": "<|im_end|>",
11
+ "<|object_ref_start|>": "<|object_ref_start|>",
12
+ "<|object_ref_end|>": "<|object_ref_end|>",
13
+ "<|box_start|>": "<|box_start|>",
14
+ "<|box_end|>": "<|box_end|>",
15
+ "<|quad_start|>": "<|quad_start|>",
16
+ "<|quad_end|>": "<|quad_end|>",
17
+ "<|vision_start|>": "<|vision_start|>",
18
+ "<|vision_end|>": "<|vision_end|>",
19
+ "<|vision_pad|>": "<|vision_pad|>",
20
+ "<|image_pad|>": "<|image_pad|>",
21
+ "<|video_pad|>": "<|video_pad|>"
22
+ },
23
  "is_local": false,
24
  "model_max_length": 1010000,
25
  "pad_token": "<|endoftext|>",
26
  "split_special_tokens": false,
27
  "tokenizer_class": "Qwen2Tokenizer",
28
+ "unk_token": null,
29
+ "chat_template": "{%- if tools %}{{- '<|im_start|>system\\n' }}{%- if messages[0].role == 'system' %}{{- messages[0].content + '\\n\\n' }}{%- endif %}{{- \"You are Qwen, made by Alibaba Cloud. You are a helpful assistant./no_think\\n\\n\" }}{{- '<tools>\\n' }}{%- for tool in tools %}{{- tool | tojson + '\\n' }}{%- endfor %}{{- '</tools>\\n\\n' }}{{- '<tool_call>\\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\\n</tool_call><|im_end|>\\n' }}{%- else %}{%- if messages[0].role == 'system' %}{{- '<|im_start|>system\\n' + messages[0].content + '/no_think<|im_end|>\\n' }}{%- else %}{{- '<|im_start|>system\\nYou are Qwen, made by Alibaba Cloud. You are a helpful assistant./no_think<|im_end|>\\n' }}{%- endif %}{%- endif %}{%- for message in messages %}{%- if message.role == 'user' %}{%- set content = message.content %}{%- set fl = content.split('\\n')[0].lower() %}{%- set formats = ['toml', 'xml', 'yaml', 'json', 'csv'] %}{%- set ns = namespace(target='', found=false) %}{%- set words = fl.split(' ') %}{%- for word in words | reverse %}{%- if not ns.found %}{%- for f in formats %}{%- if f in word and not ns.found %}{%- set ns.target = f %}{%- set ns.found = true %}{%- endif %}{%- endfor %}{%- endif %}{%- endfor %}{%- if ns.target == 'json' %}{%- set suffix = '\\n\\nOutput ONLY raw JSON. No markdown code fences. No explanation or prefix text. Do not output TOML, YAML, or any other format.' %}{%- elif ns.target == 'toml' %}{%- set suffix = '\\n\\nOutput ONLY raw TOML. No markdown code fences. No explanation or prefix text. Use [table] for nested objects and [[array]] for arrays of tables.' %}{%- elif ns.target == 'xml' %}{%- set suffix = '\\n\\nOutput ONLY raw XML. No markdown code fences. No explanation or prefix text. Use full descriptive tag names (not abbreviations like <n> or <s>). Escape & as &amp;.' %}{%- elif ns.target == 'yaml' %}{%- set suffix = '\\n\\nOutput ONLY raw YAML. No markdown code fences. No explanation or prefix text. Use proper indentation with 2 spaces.' %}{%- elif ns.target == 'csv' %}{%- set suffix = '\\n\\nOutput ONLY raw CSV. No markdown code fences. No explanation or prefix text. Include a header row.' %}{%- else %}{%- set suffix = '\\n\\nOutput ONLY the requested format. No markdown code fences. No explanation or prefix text.' %}{%- endif %}{{- '<|im_start|>user\\n' + content + suffix + '<|im_end|>\\n' }}{%- elif message.role == 'assistant' %}{{- '<|im_start|>assistant\\n' + message.content + '<|im_end|>\\n' }}{%- elif message.role != 'system' %}{{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>\\n' }}{%- endif %}{%- endfor %}{%- if add_generation_prompt %}{{- '<|im_start|>assistant\\n' }}{%- endif %}"
30
+ }