roskosmos19 commited on
Commit
1bfa4ea
·
verified ·
1 Parent(s): 9056ae5

Update tokenizer_config.json

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +50 -50
tokenizer_config.json CHANGED
@@ -3,7 +3,7 @@
3
  "add_prefix_space": false,
4
  "added_tokens_decoder": {
5
  "151643": {
6
- "content": "||<|endoftext|>",
7
  "lstrip": false,
8
  "normalized": false,
9
  "rstrip": false,
@@ -11,7 +11,7 @@
11
  "special": true
12
  },
13
  "151644": {
14
- "content": "||<|im_start|>",
15
  "lstrip": false,
16
  "normalized": false,
17
  "rstrip": false,
@@ -19,7 +19,7 @@
19
  "special": true
20
  },
21
  "151645": {
22
- "content": "||<|eos|>",
23
  "lstrip": false,
24
  "normalized": false,
25
  "rstrip": false,
@@ -27,7 +27,7 @@
27
  "special": true
28
  },
29
  "151646": {
30
- "content": "||<|object_ref_start|>",
31
  "lstrip": false,
32
  "normalized": false,
33
  "rstrip": false,
@@ -35,7 +35,7 @@
35
  "special": true
36
  },
37
  "151647": {
38
- "content": "||<|object_ref_end|>",
39
  "lstrip": false,
40
  "normalized": false,
41
  "rstrip": false,
@@ -43,7 +43,7 @@
43
  "special": true
44
  },
45
  "151648": {
46
- "content": "||<|box_start|>",
47
  "lstrip": false,
48
  "normalized": false,
49
  "rstrip": false,
@@ -51,7 +51,7 @@
51
  "special": true
52
  },
53
  "151649": {
54
- "content": "||<|box_end|>",
55
  "lstrip": false,
56
  "normalized": false,
57
  "rstrip": false,
@@ -59,7 +59,7 @@
59
  "special": true
60
  },
61
  "151650": {
62
- "content": "||<|quad_start|>",
63
  "lstrip": false,
64
  "normalized": false,
65
  "rstrip": false,
@@ -67,7 +67,7 @@
67
  "special": true
68
  },
69
  "151651": {
70
- "content": "||<|quad_end|>",
71
  "lstrip": false,
72
  "normalized": false,
73
  "rstrip": false,
@@ -75,7 +75,7 @@
75
  "special": true
76
  },
77
  "151652": {
78
- "content": "||<|vision_start|>",
79
  "lstrip": false,
80
  "normalized": false,
81
  "rstrip": false,
@@ -83,7 +83,7 @@
83
  "special": true
84
  },
85
  "151653": {
86
- "content": "||<|vision_end|>",
87
  "lstrip": false,
88
  "normalized": false,
89
  "rstrip": false,
@@ -91,7 +91,7 @@
91
  "special": true
92
  },
93
  "151654": {
94
- "content": "||<|vision_pad|>",
95
  "lstrip": false,
96
  "normalized": false,
97
  "rstrip": false,
@@ -99,7 +99,7 @@
99
  "special": true
100
  },
101
  "151655": {
102
- "content": "||<|image_pad|>",
103
  "lstrip": false,
104
  "normalized": false,
105
  "rstrip": false,
@@ -107,7 +107,7 @@
107
  "special": true
108
  },
109
  "151656": {
110
- "content": "||<|video_pad|>",
111
  "lstrip": false,
112
  "normalized": false,
113
  "rstrip": false,
@@ -131,7 +131,7 @@
131
  "special": false
132
  },
133
  "151659": {
134
- "content": "||<|fim_prefix|>",
135
  "lstrip": false,
136
  "normalized": false,
137
  "rstrip": false,
@@ -139,7 +139,7 @@
139
  "special": false
140
  },
141
  "151660": {
142
- "content": "||<|fim_middle|>",
143
  "lstrip": false,
144
  "normalized": false,
145
  "rstrip": false,
@@ -147,7 +147,7 @@
147
  "special": false
148
  },
149
  "151661": {
150
- "content": "||<|fim_suffix|>",
151
  "lstrip": false,
152
  "normalized": false,
153
  "rstrip": false,
@@ -155,7 +155,7 @@
155
  "special": false
156
  },
157
  "151662": {
158
- "content": "||<|fim_pad|>",
159
  "lstrip": false,
160
  "normalized": false,
161
  "rstrip": false,
@@ -163,7 +163,7 @@
163
  "special": false
164
  },
165
  "151663": {
166
- "content": "||<|repo_name|>",
167
  "lstrip": false,
168
  "normalized": false,
169
  "rstrip": false,
@@ -171,7 +171,7 @@
171
  "special": false
172
  },
173
  "151664": {
174
- "content": "||<|file_sep|>",
175
  "lstrip": false,
176
  "normalized": false,
177
  "rstrip": false,
@@ -195,7 +195,7 @@
195
  "special": false
196
  },
197
  "151667": {
198
- "content": "||<|thinking|>",
199
  "lstrip": false,
200
  "normalized": false,
201
  "rstrip": false,
@@ -203,7 +203,7 @@
203
  "special": false
204
  },
205
  "151668": {
206
- "content": "||<|thinking_end|>",
207
  "lstrip": false,
208
  "normalized": false,
209
  "rstrip": false,
@@ -211,7 +211,7 @@
211
  "special": false
212
  },
213
  "151669": {
214
- "content": "||<|think_start|>",
215
  "lstrip": false,
216
  "normalized": false,
217
  "rstrip": false,
@@ -219,7 +219,7 @@
219
  "special": true
220
  },
221
  "151670": {
222
- "content": "||<|think_end|>",
223
  "lstrip": false,
224
  "normalized": false,
225
  "rstrip": false,
@@ -227,7 +227,7 @@
227
  "special": true
228
  },
229
  "151671": {
230
- "content": "||<|review_start|>",
231
  "lstrip": false,
232
  "normalized": false,
233
  "rstrip": false,
@@ -235,7 +235,7 @@
235
  "special": true
236
  },
237
  "151672": {
238
- "content": "||<|review_end|>",
239
  "lstrip": false,
240
  "normalized": false,
241
  "rstrip": false,
@@ -243,7 +243,7 @@
243
  "special": true
244
  },
245
  "151673": {
246
- "content": "||<|final_start|>",
247
  "lstrip": false,
248
  "normalized": false,
249
  "rstrip": false,
@@ -251,7 +251,7 @@
251
  "special": true
252
  },
253
  "151674": {
254
- "content": "||<|final_end|>",
255
  "lstrip": false,
256
  "normalized": false,
257
  "rstrip": false,
@@ -260,34 +260,34 @@
260
  }
261
  },
262
  "additional_special_tokens": [
263
- "||<|im_start|>",
264
- "||<|eos|>",
265
- "||<|object_ref_start|>",
266
- "||<|object_ref_end|>",
267
- "||<|box_start|>",
268
- "||<|box_end|>",
269
- "||<|quad_start|>",
270
- "||<|quad_end|>",
271
- "||<|vision_start|>",
272
- "||<|vision_end|>",
273
- "||<|vision_pad|>",
274
- "||<|image_pad|>",
275
- "||<|video_pad|>",
276
- "||<|think_start|>",
277
- "||<|think_end|>",
278
- "||<|review_start|>",
279
- "||<|review_end|>",
280
- "||<|final_start|>",
281
- "||<|final_end|>"
282
  ],
283
  "bos_token": null,
284
- "chat_template": null,
285
  "clean_up_tokenization_spaces": false,
286
- "eos_token": "||<|eos|>",
287
  "errors": "replace",
288
  "extra_special_tokens": {},
289
  "model_max_length": 262144,
290
- "pad_token": "||<|endoftext|>",
291
  "split_special_tokens": false,
292
  "tokenizer_class": "Qwen2Tokenizer",
293
  "unk_token": null
 
3
  "add_prefix_space": false,
4
  "added_tokens_decoder": {
5
  "151643": {
6
+ "content": "||<|<|endoftext|>",
7
  "lstrip": false,
8
  "normalized": false,
9
  "rstrip": false,
 
11
  "special": true
12
  },
13
  "151644": {
14
+ "content": "||<|<|im_start|>",
15
  "lstrip": false,
16
  "normalized": false,
17
  "rstrip": false,
 
19
  "special": true
20
  },
21
  "151645": {
22
+ "content": "||<|<|eos|>",
23
  "lstrip": false,
24
  "normalized": false,
25
  "rstrip": false,
 
27
  "special": true
28
  },
29
  "151646": {
30
+ "content": "||<|<|object_ref_start|>",
31
  "lstrip": false,
32
  "normalized": false,
33
  "rstrip": false,
 
35
  "special": true
36
  },
37
  "151647": {
38
+ "content": "||<|<|object_ref_end|>",
39
  "lstrip": false,
40
  "normalized": false,
41
  "rstrip": false,
 
43
  "special": true
44
  },
45
  "151648": {
46
+ "content": "||<|<|box_start|>",
47
  "lstrip": false,
48
  "normalized": false,
49
  "rstrip": false,
 
51
  "special": true
52
  },
53
  "151649": {
54
+ "content": "||<|<|box_end|>",
55
  "lstrip": false,
56
  "normalized": false,
57
  "rstrip": false,
 
59
  "special": true
60
  },
61
  "151650": {
62
+ "content": "||<|<|quad_start|>",
63
  "lstrip": false,
64
  "normalized": false,
65
  "rstrip": false,
 
67
  "special": true
68
  },
69
  "151651": {
70
+ "content": "||<|<|quad_end|>",
71
  "lstrip": false,
72
  "normalized": false,
73
  "rstrip": false,
 
75
  "special": true
76
  },
77
  "151652": {
78
+ "content": "||<|<|vision_start|>",
79
  "lstrip": false,
80
  "normalized": false,
81
  "rstrip": false,
 
83
  "special": true
84
  },
85
  "151653": {
86
+ "content": "||<|<|vision_end|>",
87
  "lstrip": false,
88
  "normalized": false,
89
  "rstrip": false,
 
91
  "special": true
92
  },
93
  "151654": {
94
+ "content": "||<|<|vision_pad|>",
95
  "lstrip": false,
96
  "normalized": false,
97
  "rstrip": false,
 
99
  "special": true
100
  },
101
  "151655": {
102
+ "content": "||<|<|image_pad|>",
103
  "lstrip": false,
104
  "normalized": false,
105
  "rstrip": false,
 
107
  "special": true
108
  },
109
  "151656": {
110
+ "content": "||<|<|video_pad|>",
111
  "lstrip": false,
112
  "normalized": false,
113
  "rstrip": false,
 
131
  "special": false
132
  },
133
  "151659": {
134
+ "content": "||<|<|fim_prefix|>",
135
  "lstrip": false,
136
  "normalized": false,
137
  "rstrip": false,
 
139
  "special": false
140
  },
141
  "151660": {
142
+ "content": "||<|<|fim_middle|>",
143
  "lstrip": false,
144
  "normalized": false,
145
  "rstrip": false,
 
147
  "special": false
148
  },
149
  "151661": {
150
+ "content": "||<|<|fim_suffix|>",
151
  "lstrip": false,
152
  "normalized": false,
153
  "rstrip": false,
 
155
  "special": false
156
  },
157
  "151662": {
158
+ "content": "||<|<|fim_pad|>",
159
  "lstrip": false,
160
  "normalized": false,
161
  "rstrip": false,
 
163
  "special": false
164
  },
165
  "151663": {
166
+ "content": "||<|<|repo_name|>",
167
  "lstrip": false,
168
  "normalized": false,
169
  "rstrip": false,
 
171
  "special": false
172
  },
173
  "151664": {
174
+ "content": "||<|<|file_sep|>",
175
  "lstrip": false,
176
  "normalized": false,
177
  "rstrip": false,
 
195
  "special": false
196
  },
197
  "151667": {
198
+ "content": "||<|<|thinking|>",
199
  "lstrip": false,
200
  "normalized": false,
201
  "rstrip": false,
 
203
  "special": false
204
  },
205
  "151668": {
206
+ "content": "||<|<|thinking_end|>",
207
  "lstrip": false,
208
  "normalized": false,
209
  "rstrip": false,
 
211
  "special": false
212
  },
213
  "151669": {
214
+ "content": "||<|<|think_start|>",
215
  "lstrip": false,
216
  "normalized": false,
217
  "rstrip": false,
 
219
  "special": true
220
  },
221
  "151670": {
222
+ "content": "||<|<|think_end|>",
223
  "lstrip": false,
224
  "normalized": false,
225
  "rstrip": false,
 
227
  "special": true
228
  },
229
  "151671": {
230
+ "content": "||<|<|review_start|>",
231
  "lstrip": false,
232
  "normalized": false,
233
  "rstrip": false,
 
235
  "special": true
236
  },
237
  "151672": {
238
+ "content": "||<|<|review_end|>",
239
  "lstrip": false,
240
  "normalized": false,
241
  "rstrip": false,
 
243
  "special": true
244
  },
245
  "151673": {
246
+ "content": "||<|<|final_start|>",
247
  "lstrip": false,
248
  "normalized": false,
249
  "rstrip": false,
 
251
  "special": true
252
  },
253
  "151674": {
254
+ "content": "||<|<|final_end|>",
255
  "lstrip": false,
256
  "normalized": false,
257
  "rstrip": false,
 
260
  }
261
  },
262
  "additional_special_tokens": [
263
+ "||<|<|im_start|>",
264
+ "||<|<|eos|>",
265
+ "||<|<|object_ref_start|>",
266
+ "||<|<|object_ref_end|>",
267
+ "||<|<|box_start|>",
268
+ "||<|<|box_end|>",
269
+ "||<|<|quad_start|>",
270
+ "||<|<|quad_end|>",
271
+ "||<|<|vision_start|>",
272
+ "||<|<|vision_end|>",
273
+ "||<|<|vision_pad|>",
274
+ "||<|<|image_pad|>",
275
+ "||<|<|video_pad|>",
276
+ "||<|<|think_start|>",
277
+ "||<|<|think_end|>",
278
+ "||<|<|review_start|>",
279
+ "||<|<|review_end|>",
280
+ "||<|<|final_start|>",
281
+ "||<|<|final_end|>"
282
  ],
283
  "bos_token": null,
284
+ "chat_template": "",
285
  "clean_up_tokenization_spaces": false,
286
+ "eos_token": "||<|<|eos|>",
287
  "errors": "replace",
288
  "extra_special_tokens": {},
289
  "model_max_length": 262144,
290
+ "pad_token": "||<|<|endoftext|>",
291
  "split_special_tokens": false,
292
  "tokenizer_class": "Qwen2Tokenizer",
293
  "unk_token": null