danielhanchen commited on
Commit
be8abba
·
verified ·
1 Parent(s): 9c0df96

Upload tokenizer

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +10 -11
tokenizer_config.json CHANGED
@@ -4,7 +4,7 @@
4
  "add_prefix_space": null,
5
  "added_tokens_decoder": {
6
  "128000": {
7
- "content": "<\uff5cbegin\u2581of\u2581sentence\uff5c>",
8
  "lstrip": false,
9
  "normalized": false,
10
  "rstrip": false,
@@ -12,7 +12,7 @@
12
  "special": true
13
  },
14
  "128001": {
15
- "content": "<\uff5cend\u2581of\u2581sentence\uff5c>",
16
  "lstrip": false,
17
  "normalized": false,
18
  "rstrip": false,
@@ -92,7 +92,7 @@
92
  "special": true
93
  },
94
  "128011": {
95
- "content": "<\uff5cUser\uff5c>",
96
  "lstrip": false,
97
  "normalized": false,
98
  "rstrip": false,
@@ -100,7 +100,7 @@
100
  "special": false
101
  },
102
  "128012": {
103
- "content": "<\uff5cAssistant\uff5c>",
104
  "lstrip": false,
105
  "normalized": false,
106
  "rstrip": false,
@@ -124,7 +124,7 @@
124
  "special": false
125
  },
126
  "128015": {
127
- "content": "<\uff5c\u2581pad\u2581\uff5c>",
128
  "lstrip": false,
129
  "normalized": false,
130
  "rstrip": false,
@@ -2052,17 +2052,16 @@
2052
  "special": true
2053
  }
2054
  },
2055
- "bos_token": "<\uff5cbegin\u2581of\u2581sentence\uff5c>",
2056
  "clean_up_tokenization_spaces": false,
2057
- "eos_token": "<\uff5cend\u2581of\u2581sentence\uff5c>",
2058
  "extra_special_tokens": {},
2059
  "legacy": true,
2060
- "model_max_length": 16384,
2061
  "pad_token": "<|finetune_right_pad_id|>",
2062
  "padding_side": "left",
2063
  "sp_model_kwargs": {},
2064
  "tokenizer_class": "LlamaTokenizerFast",
2065
  "unk_token": null,
2066
- "use_default_system_prompt": false,
2067
- "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<\uff5cUser\uff5c>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<\uff5cAssistant\uff5c><\uff5ctool\u2581calls\u2581begin\uff5c><\uff5ctool\u2581call\u2581begin\uff5c>' + tool['type'] + '<\uff5ctool\u2581sep\uff5c>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<\uff5ctool\u2581call\u2581end\uff5c>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<\uff5ctool\u2581call\u2581begin\uff5c>' + tool['type'] + '<\uff5ctool\u2581sep\uff5c>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<\uff5ctool\u2581call\u2581end\uff5c>'}}{{'<\uff5ctool\u2581calls\u2581end\uff5c><\uff5cend\u2581of\u2581sentence\uff5c>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<\uff5ctool\u2581outputs\u2581end\uff5c>' + message['content'] + '<\uff5cend\u2581of\u2581sentence\uff5c>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '</think>' in content %}{% set content = content.split('</think>')[-1] %}{% endif %}{{'<\uff5cAssistant\uff5c>' + content + '<\uff5cend\u2581of\u2581sentence\uff5c>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<\uff5ctool\u2581outputs\u2581begin\uff5c><\uff5ctool\u2581output\u2581begin\uff5c>' + message['content'] + '<\uff5ctool\u2581output\u2581end\uff5c>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<\uff5ctool\u2581output\u2581begin\uff5c>' + message['content'] + '<\uff5ctool\u2581output\u2581end\uff5c>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<\uff5ctool\u2581outputs\u2581end\uff5c>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<\uff5cAssistant\uff5c><think>\\n'}}{% endif %}"
2068
- }
 
4
  "add_prefix_space": null,
5
  "added_tokens_decoder": {
6
  "128000": {
7
+ "content": "<|begin▁of▁sentence|>",
8
  "lstrip": false,
9
  "normalized": false,
10
  "rstrip": false,
 
12
  "special": true
13
  },
14
  "128001": {
15
+ "content": "<|end▁of▁sentence|>",
16
  "lstrip": false,
17
  "normalized": false,
18
  "rstrip": false,
 
92
  "special": true
93
  },
94
  "128011": {
95
+ "content": "<|User|>",
96
  "lstrip": false,
97
  "normalized": false,
98
  "rstrip": false,
 
100
  "special": false
101
  },
102
  "128012": {
103
+ "content": "<|Assistant|>",
104
  "lstrip": false,
105
  "normalized": false,
106
  "rstrip": false,
 
124
  "special": false
125
  },
126
  "128015": {
127
+ "content": "<|▁pad▁|>",
128
  "lstrip": false,
129
  "normalized": false,
130
  "rstrip": false,
 
2052
  "special": true
2053
  }
2054
  },
2055
+ "bos_token": "<|begin▁of▁sentence|>",
2056
  "clean_up_tokenization_spaces": false,
2057
+ "eos_token": "<|end▁of▁sentence|>",
2058
  "extra_special_tokens": {},
2059
  "legacy": true,
2060
+ "model_max_length": 131072,
2061
  "pad_token": "<|finetune_right_pad_id|>",
2062
  "padding_side": "left",
2063
  "sp_model_kwargs": {},
2064
  "tokenizer_class": "LlamaTokenizerFast",
2065
  "unk_token": null,
2066
+ "use_default_system_prompt": false
2067
+ }