latishab commited on
Commit
6e55206
·
verified ·
1 Parent(s): ead106a

Upload tokenizer

Browse files
added_tokens.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "<|assistant|>": 49153,
3
+ "<|user|>": 49152
4
+ }
special_tokens_map.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "additional_special_tokens": [
3
- "<|im_start|>",
4
- "<|im_end|>"
5
  ],
6
  "bos_token": {
7
  "content": "<|im_start|>",
 
1
  {
2
  "additional_special_tokens": [
3
+ "<|user|>",
4
+ "<|assistant|>"
5
  ],
6
  "bos_token": {
7
  "content": "<|im_start|>",
tokenizer.json CHANGED
@@ -167,6 +167,24 @@
167
  "rstrip": false,
168
  "normalized": false,
169
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
  }
171
  ],
172
  "normalizer": null,
 
167
  "rstrip": false,
168
  "normalized": false,
169
  "special": true
170
+ },
171
+ {
172
+ "id": 49152,
173
+ "content": "<|user|>",
174
+ "single_word": false,
175
+ "lstrip": false,
176
+ "rstrip": false,
177
+ "normalized": false,
178
+ "special": true
179
+ },
180
+ {
181
+ "id": 49153,
182
+ "content": "<|assistant|>",
183
+ "single_word": false,
184
+ "lstrip": false,
185
+ "rstrip": false,
186
+ "normalized": false,
187
+ "special": true
188
  }
189
  ],
190
  "normalizer": null,
tokenizer_config.json CHANGED
@@ -136,18 +136,34 @@
136
  "rstrip": false,
137
  "single_word": false,
138
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  }
140
  },
141
  "additional_special_tokens": [
142
- "<|im_start|>",
143
- "<|im_end|>"
144
  ],
145
  "bos_token": "<|im_start|>",
146
- "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
147
  "clean_up_tokenization_spaces": false,
148
  "eos_token": "<|im_end|>",
149
  "extra_special_tokens": {},
150
- "max_length": 128,
151
  "model_max_length": 8192,
152
  "pad_to_multiple_of": null,
153
  "pad_token": "<|im_end|>",
 
136
  "rstrip": false,
137
  "single_word": false,
138
  "special": true
139
+ },
140
+ "49152": {
141
+ "content": "<|user|>",
142
+ "lstrip": false,
143
+ "normalized": false,
144
+ "rstrip": false,
145
+ "single_word": false,
146
+ "special": true
147
+ },
148
+ "49153": {
149
+ "content": "<|assistant|>",
150
+ "lstrip": false,
151
+ "normalized": false,
152
+ "rstrip": false,
153
+ "single_word": false,
154
+ "special": true
155
  }
156
  },
157
  "additional_special_tokens": [
158
+ "<|user|>",
159
+ "<|assistant|>"
160
  ],
161
  "bos_token": "<|im_start|>",
162
+ "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful AI assistant named SmolLM, trained by Hugging Face<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
163
  "clean_up_tokenization_spaces": false,
164
  "eos_token": "<|im_end|>",
165
  "extra_special_tokens": {},
166
+ "max_length": 8192,
167
  "model_max_length": 8192,
168
  "pad_to_multiple_of": null,
169
  "pad_token": "<|im_end|>",