Update tokenizer_config.json
Browse filesfrom https://huggingface.co/mlx-community/DeepSeek-V3-0324-4bit/raw/main/tokenizer_config.json
as this is returning
<|begin▁of▁sentence|><|User|>Write a story about a bird
instead of
<|begin▁of▁sentence|><|User|>Write a story about a bird<|Assistant|>
- tokenizer_config.json +9 -9
tokenizer_config.json
CHANGED
|
@@ -6412,20 +6412,20 @@
|
|
| 6412 |
"special": true
|
| 6413 |
},
|
| 6414 |
"128798": {
|
| 6415 |
-
"content": "
|
| 6416 |
"lstrip": false,
|
| 6417 |
-
"normalized":
|
| 6418 |
"rstrip": false,
|
| 6419 |
"single_word": false,
|
| 6420 |
-
"special":
|
| 6421 |
},
|
| 6422 |
"128799": {
|
| 6423 |
-
"content": "
|
| 6424 |
"lstrip": false,
|
| 6425 |
-
"normalized":
|
| 6426 |
"rstrip": false,
|
| 6427 |
"single_word": false,
|
| 6428 |
-
"special":
|
| 6429 |
},
|
| 6430 |
"128800": {
|
| 6431 |
"content": "<|fim▁hole|>",
|
|
@@ -6549,15 +6549,15 @@
|
|
| 6549 |
}
|
| 6550 |
},
|
| 6551 |
"bos_token": "<|begin▁of▁sentence|>",
|
| 6552 |
-
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['
|
| 6553 |
"clean_up_tokenization_spaces": false,
|
| 6554 |
"eos_token": "<|end▁of▁sentence|>",
|
| 6555 |
"extra_special_tokens": {},
|
| 6556 |
"legacy": true,
|
| 6557 |
-
"model_max_length":
|
| 6558 |
"pad_token": "<|end▁of▁sentence|>",
|
| 6559 |
"sp_model_kwargs": {},
|
| 6560 |
-
"tokenizer_class": "
|
| 6561 |
"unk_token": null,
|
| 6562 |
"use_default_system_prompt": false
|
| 6563 |
}
|
|
|
|
| 6412 |
"special": true
|
| 6413 |
},
|
| 6414 |
"128798": {
|
| 6415 |
+
"content": "<|place▁holder▁no▁798|>",
|
| 6416 |
"lstrip": false,
|
| 6417 |
+
"normalized": false,
|
| 6418 |
"rstrip": false,
|
| 6419 |
"single_word": false,
|
| 6420 |
+
"special": true
|
| 6421 |
},
|
| 6422 |
"128799": {
|
| 6423 |
+
"content": "<|place▁holder▁no▁799|>",
|
| 6424 |
"lstrip": false,
|
| 6425 |
+
"normalized": false,
|
| 6426 |
"rstrip": false,
|
| 6427 |
"single_word": false,
|
| 6428 |
+
"special": true
|
| 6429 |
},
|
| 6430 |
"128800": {
|
| 6431 |
"content": "<|fim▁hole|>",
|
|
|
|
| 6549 |
}
|
| 6550 |
},
|
| 6551 |
"bos_token": "<|begin▁of▁sentence|>",
|
| 6552 |
+
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true, is_last_user=false) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '\n\n' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{ bos_token }}{{ ns.system_prompt }}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{%- set ns.is_first = false -%}{%- set ns.is_last_user = true -%}{{'<|User|>' + message['content'] + '<|Assistant|>'}}{%- endif %}{%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}{%- set ns.is_last_user = false -%}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{%- endif %}{%- set ns.is_first = false %}{%- set ns.is_tool = false -%}{%- set ns.is_output_first = true %}{%- for tool in message['tool_calls'] %}{%- if not ns.is_first %}{%- if message['content'] is none %}{{'<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{%- else %}{{message['content'] + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{%- endif %}{%- set ns.is_first = true -%}{%- else %}{{'\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{%- endif %}{%- endfor %}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none)%}{%- set ns.is_last_user = false -%}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{{content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_last_user = false -%}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_last_user and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}",
|
| 6553 |
"clean_up_tokenization_spaces": false,
|
| 6554 |
"eos_token": "<|end▁of▁sentence|>",
|
| 6555 |
"extra_special_tokens": {},
|
| 6556 |
"legacy": true,
|
| 6557 |
+
"model_max_length": 131072,
|
| 6558 |
"pad_token": "<|end▁of▁sentence|>",
|
| 6559 |
"sp_model_kwargs": {},
|
| 6560 |
+
"tokenizer_class": "LlamaTokenizerFast",
|
| 6561 |
"unk_token": null,
|
| 6562 |
"use_default_system_prompt": false
|
| 6563 |
}
|