rohithsiddhartha commited on
Commit
efe85d9
·
verified ·
1 Parent(s): 3cad899

Update tokenizer_config.json

Browse files

from https://huggingface.co/mlx-community/DeepSeek-V3-0324-4bit/raw/main/tokenizer_config.json
as this is returning
<|begin▁of▁sentence|><|User|>Write a story about a bird
instead of
<|begin▁of▁sentence|><|User|>Write a story about a bird<|Assistant|>

Files changed (1) hide show
  1. tokenizer_config.json +9 -9
tokenizer_config.json CHANGED
@@ -6412,20 +6412,20 @@
6412
  "special": true
6413
  },
6414
  "128798": {
6415
- "content": "<think>",
6416
  "lstrip": false,
6417
- "normalized": true,
6418
  "rstrip": false,
6419
  "single_word": false,
6420
- "special": false
6421
  },
6422
  "128799": {
6423
- "content": "</think>",
6424
  "lstrip": false,
6425
- "normalized": true,
6426
  "rstrip": false,
6427
  "single_word": false,
6428
- "special": false
6429
  },
6430
  "128800": {
6431
  "content": "<|fim▁hole|>",
@@ -6549,15 +6549,15 @@
6549
  }
6550
  },
6551
  "bos_token": "<|begin▁of▁sentence|>",
6552
- "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|><think>\\n'}}{% endif %}",
6553
  "clean_up_tokenization_spaces": false,
6554
  "eos_token": "<|end▁of▁sentence|>",
6555
  "extra_special_tokens": {},
6556
  "legacy": true,
6557
- "model_max_length": 16384,
6558
  "pad_token": "<|end▁of▁sentence|>",
6559
  "sp_model_kwargs": {},
6560
- "tokenizer_class": "LlamaTokenizer",
6561
  "unk_token": null,
6562
  "use_default_system_prompt": false
6563
  }
 
6412
  "special": true
6413
  },
6414
  "128798": {
6415
+ "content": "<|place▁holder▁no▁798|>",
6416
  "lstrip": false,
6417
+ "normalized": false,
6418
  "rstrip": false,
6419
  "single_word": false,
6420
+ "special": true
6421
  },
6422
  "128799": {
6423
+ "content": "<|place▁holder▁no▁799|>",
6424
  "lstrip": false,
6425
+ "normalized": false,
6426
  "rstrip": false,
6427
  "single_word": false,
6428
+ "special": true
6429
  },
6430
  "128800": {
6431
  "content": "<|fim▁hole|>",
 
6549
  }
6550
  },
6551
  "bos_token": "<|begin▁of▁sentence|>",
6552
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true, is_last_user=false) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '\n\n' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{ bos_token }}{{ ns.system_prompt }}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{%- set ns.is_first = false -%}{%- set ns.is_last_user = true -%}{{'<|User|>' + message['content'] + '<|Assistant|>'}}{%- endif %}{%- if message['role'] == 'assistant' and message['tool_calls'] is defined and message['tool_calls'] is not none %}{%- set ns.is_last_user = false -%}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{%- endif %}{%- set ns.is_first = false %}{%- set ns.is_tool = false -%}{%- set ns.is_output_first = true %}{%- for tool in message['tool_calls'] %}{%- if not ns.is_first %}{%- if message['content'] is none %}{{'<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{%- else %}{{message['content'] + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{%- endif %}{%- set ns.is_first = true -%}{%- else %}{{'\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\n' + '```json' + '\n' + tool['function']['arguments'] + '\n' + '```' + '<|tool▁call▁end|>'}}{%- endif %}{%- endfor %}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- if message['role'] == 'assistant' and (message['tool_calls'] is not defined or message['tool_calls'] is none)%}{%- set ns.is_last_user = false -%}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{{content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_last_user = false -%}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_last_user and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}",
6553
  "clean_up_tokenization_spaces": false,
6554
  "eos_token": "<|end▁of▁sentence|>",
6555
  "extra_special_tokens": {},
6556
  "legacy": true,
6557
+ "model_max_length": 131072,
6558
  "pad_token": "<|end▁of▁sentence|>",
6559
  "sp_model_kwargs": {},
6560
+ "tokenizer_class": "LlamaTokenizerFast",
6561
  "unk_token": null,
6562
  "use_default_system_prompt": false
6563
  }