wenhuach commited on
Commit
b200ba8
·
verified ·
1 Parent(s): ce52056

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
chat_template.jinja ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [gMASK]<sop>
2
+ {%- if tools -%}
3
+ <|system|>
4
+ # Tools
5
+
6
+ You may call one or more functions to assist with the user query.
7
+
8
+ You are provided with function signatures within <tools></tools> XML tags:
9
+ <tools>
10
+ {% for tool in tools %}
11
+ {{ tool | tojson(ensure_ascii=False) }}
12
+ {% endfor %}
13
+ </tools>
14
+
15
+ For each function call, output the function name and arguments within the following XML format:
16
+ <tool_call>{function-name}<arg_key>{arg-key-1}</arg_key><arg_value>{arg-value-1}</arg_value><arg_key>{arg-key-2}</arg_key><arg_value>{arg-value-2}</arg_value>...</tool_call>{%- endif -%}
17
+ {%- macro visible_text(content) -%}
18
+ {%- if content is string -%}
19
+ {{- content }}
20
+ {%- elif content is iterable and content is not mapping -%}
21
+ {%- for item in content -%}
22
+ {%- if item is mapping and item.type == 'text' -%}
23
+ {{- item.text }}
24
+ {%- elif item is string -%}
25
+ {{- item }}
26
+ {%- endif -%}
27
+ {%- endfor -%}
28
+ {%- else -%}
29
+ {{- content }}
30
+ {%- endif -%}
31
+ {%- endmacro -%}
32
+ {%- set ns = namespace(last_user_index=-1) %}
33
+ {%- for m in messages %}
34
+ {%- if m.role == 'user' %}
35
+ {% set ns.last_user_index = loop.index0 -%}
36
+ {%- endif %}
37
+ {%- endfor %}
38
+ {% for m in messages %}
39
+ {%- if m.role == 'user' -%}<|user|>{{ visible_text(m.content) }}
40
+ {%- elif m.role == 'assistant' -%}
41
+ <|assistant|>
42
+ {%- set reasoning_content = '' %}
43
+ {%- set content = visible_text(m.content) %}
44
+ {%- if m.reasoning_content is string %}
45
+ {%- set reasoning_content = m.reasoning_content %}
46
+ {%- else %}
47
+ {%- if '</think>' in content %}
48
+ {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
49
+ {%- set content = content.split('</think>')[-1].lstrip('\n') %}
50
+ {%- endif %}
51
+ {%- endif %}
52
+ {%- if ((clear_thinking is defined and not clear_thinking) or loop.index0 > ns.last_user_index) and reasoning_content -%}
53
+ {{ '<think>' + reasoning_content.strip() + '</think>'}}
54
+ {%- else -%}
55
+ {{ '</think>' }}
56
+ {%- endif -%}
57
+ {%- if content.strip() -%}
58
+ {{ content.strip() }}
59
+ {%- endif -%}
60
+ {% if m.tool_calls %}
61
+ {% for tc in m.tool_calls %}
62
+ {%- if tc.function %}
63
+ {%- set tc = tc.function %}
64
+ {%- endif %}
65
+ {{- '<tool_call>' + tc.name -}}
66
+ {% set _args = tc.arguments %}{% for k, v in _args.items() %}<arg_key>{{ k }}</arg_key><arg_value>{{ v | tojson(ensure_ascii=False) if v is not string else v }}</arg_value>{% endfor %}</tool_call>{% endfor %}
67
+ {% endif %}
68
+ {%- elif m.role == 'tool' -%}
69
+ {%- if m.content is string -%}
70
+ {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
71
+ {{- '<|observation|>' }}
72
+ {%- endif %}
73
+ {{- '<tool_response>' }}
74
+ {{- m.content }}
75
+ {{- '</tool_response>' }}
76
+ {%- else -%}
77
+ <|observation|>{% for tr in m.content %}
78
+ <tool_response>{{ tr.output if tr.output is defined else tr }}</tool_response>{% endfor -%}
79
+ {% endif -%}
80
+ {%- elif m.role == 'system' -%}
81
+ <|system|>{{ visible_text(m.content) }}
82
+ {%- endif -%}
83
+ {%- endfor -%}
84
+ {%- if add_generation_prompt -%}
85
+ <|assistant|>{{- '</think>' if (enable_thinking is defined and not enable_thinking) else '<think>' -}}
86
+ {%- endif -%}
config.json ADDED
@@ -0,0 +1,679 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Glm4MoeLiteForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 0,
8
+ "dtype": "bfloat16",
9
+ "eos_token_id": [
10
+ 154820,
11
+ 154827,
12
+ 154829
13
+ ],
14
+ "first_k_dense_replace": 1,
15
+ "head_dim": 64,
16
+ "hidden_act": "silu",
17
+ "hidden_size": 2048,
18
+ "initializer_range": 0.02,
19
+ "intermediate_size": 10240,
20
+ "kv_lora_rank": 512,
21
+ "max_position_embeddings": 202752,
22
+ "mlp_layer_types": [
23
+ "dense",
24
+ "sparse",
25
+ "sparse",
26
+ "sparse",
27
+ "sparse",
28
+ "sparse",
29
+ "sparse",
30
+ "sparse",
31
+ "sparse",
32
+ "sparse",
33
+ "sparse",
34
+ "sparse",
35
+ "sparse",
36
+ "sparse",
37
+ "sparse",
38
+ "sparse",
39
+ "sparse",
40
+ "sparse",
41
+ "sparse",
42
+ "sparse",
43
+ "sparse",
44
+ "sparse",
45
+ "sparse",
46
+ "sparse",
47
+ "sparse",
48
+ "sparse",
49
+ "sparse",
50
+ "sparse",
51
+ "sparse",
52
+ "sparse",
53
+ "sparse",
54
+ "sparse",
55
+ "sparse",
56
+ "sparse",
57
+ "sparse",
58
+ "sparse",
59
+ "sparse",
60
+ "sparse",
61
+ "sparse",
62
+ "sparse",
63
+ "sparse",
64
+ "sparse",
65
+ "sparse",
66
+ "sparse",
67
+ "sparse",
68
+ "sparse",
69
+ "sparse"
70
+ ],
71
+ "model_type": "glm4_moe_lite",
72
+ "moe_intermediate_size": 1536,
73
+ "n_group": 1,
74
+ "n_routed_experts": 64,
75
+ "n_shared_experts": 1,
76
+ "norm_topk_prob": true,
77
+ "num_attention_heads": 20,
78
+ "num_experts_per_tok": 4,
79
+ "num_hidden_layers": 47,
80
+ "num_key_value_heads": 20,
81
+ "num_nextn_predict_layers": 1,
82
+ "pad_token_id": 154820,
83
+ "partial_rotary_factor": 1.0,
84
+ "pretraining_tp": 1,
85
+ "q_lora_rank": 768,
86
+ "qk_head_dim": 256,
87
+ "qk_nope_head_dim": 192,
88
+ "qk_rope_head_dim": 64,
89
+ "quantization_config": {
90
+ "autoround_version": "0.9.6",
91
+ "bits": 4,
92
+ "data_type": "int",
93
+ "extra_config": {
94
+ "model.layers.0.mlp.down_proj": {
95
+ "bits": 16,
96
+ "data_type": "float"
97
+ },
98
+ "model.layers.0.mlp.gate_proj": {
99
+ "bits": 16,
100
+ "data_type": "float"
101
+ },
102
+ "model.layers.0.mlp.up_proj": {
103
+ "bits": 16,
104
+ "data_type": "float"
105
+ },
106
+ "model.layers.1.mlp.shared_experts.down_proj": {
107
+ "bits": 16,
108
+ "data_type": "float"
109
+ },
110
+ "model.layers.1.mlp.shared_experts.gate_proj": {
111
+ "bits": 16,
112
+ "data_type": "float"
113
+ },
114
+ "model.layers.1.mlp.shared_experts.up_proj": {
115
+ "bits": 16,
116
+ "data_type": "float"
117
+ },
118
+ "model.layers.10.mlp.shared_experts.down_proj": {
119
+ "bits": 16,
120
+ "data_type": "float"
121
+ },
122
+ "model.layers.10.mlp.shared_experts.gate_proj": {
123
+ "bits": 16,
124
+ "data_type": "float"
125
+ },
126
+ "model.layers.10.mlp.shared_experts.up_proj": {
127
+ "bits": 16,
128
+ "data_type": "float"
129
+ },
130
+ "model.layers.11.mlp.shared_experts.down_proj": {
131
+ "bits": 16,
132
+ "data_type": "float"
133
+ },
134
+ "model.layers.11.mlp.shared_experts.gate_proj": {
135
+ "bits": 16,
136
+ "data_type": "float"
137
+ },
138
+ "model.layers.11.mlp.shared_experts.up_proj": {
139
+ "bits": 16,
140
+ "data_type": "float"
141
+ },
142
+ "model.layers.12.mlp.shared_experts.down_proj": {
143
+ "bits": 16,
144
+ "data_type": "float"
145
+ },
146
+ "model.layers.12.mlp.shared_experts.gate_proj": {
147
+ "bits": 16,
148
+ "data_type": "float"
149
+ },
150
+ "model.layers.12.mlp.shared_experts.up_proj": {
151
+ "bits": 16,
152
+ "data_type": "float"
153
+ },
154
+ "model.layers.13.mlp.shared_experts.down_proj": {
155
+ "bits": 16,
156
+ "data_type": "float"
157
+ },
158
+ "model.layers.13.mlp.shared_experts.gate_proj": {
159
+ "bits": 16,
160
+ "data_type": "float"
161
+ },
162
+ "model.layers.13.mlp.shared_experts.up_proj": {
163
+ "bits": 16,
164
+ "data_type": "float"
165
+ },
166
+ "model.layers.14.mlp.shared_experts.down_proj": {
167
+ "bits": 16,
168
+ "data_type": "float"
169
+ },
170
+ "model.layers.14.mlp.shared_experts.gate_proj": {
171
+ "bits": 16,
172
+ "data_type": "float"
173
+ },
174
+ "model.layers.14.mlp.shared_experts.up_proj": {
175
+ "bits": 16,
176
+ "data_type": "float"
177
+ },
178
+ "model.layers.15.mlp.shared_experts.down_proj": {
179
+ "bits": 16,
180
+ "data_type": "float"
181
+ },
182
+ "model.layers.15.mlp.shared_experts.gate_proj": {
183
+ "bits": 16,
184
+ "data_type": "float"
185
+ },
186
+ "model.layers.15.mlp.shared_experts.up_proj": {
187
+ "bits": 16,
188
+ "data_type": "float"
189
+ },
190
+ "model.layers.16.mlp.shared_experts.down_proj": {
191
+ "bits": 16,
192
+ "data_type": "float"
193
+ },
194
+ "model.layers.16.mlp.shared_experts.gate_proj": {
195
+ "bits": 16,
196
+ "data_type": "float"
197
+ },
198
+ "model.layers.16.mlp.shared_experts.up_proj": {
199
+ "bits": 16,
200
+ "data_type": "float"
201
+ },
202
+ "model.layers.17.mlp.shared_experts.down_proj": {
203
+ "bits": 16,
204
+ "data_type": "float"
205
+ },
206
+ "model.layers.17.mlp.shared_experts.gate_proj": {
207
+ "bits": 16,
208
+ "data_type": "float"
209
+ },
210
+ "model.layers.17.mlp.shared_experts.up_proj": {
211
+ "bits": 16,
212
+ "data_type": "float"
213
+ },
214
+ "model.layers.18.mlp.shared_experts.down_proj": {
215
+ "bits": 16,
216
+ "data_type": "float"
217
+ },
218
+ "model.layers.18.mlp.shared_experts.gate_proj": {
219
+ "bits": 16,
220
+ "data_type": "float"
221
+ },
222
+ "model.layers.18.mlp.shared_experts.up_proj": {
223
+ "bits": 16,
224
+ "data_type": "float"
225
+ },
226
+ "model.layers.19.mlp.shared_experts.down_proj": {
227
+ "bits": 16,
228
+ "data_type": "float"
229
+ },
230
+ "model.layers.19.mlp.shared_experts.gate_proj": {
231
+ "bits": 16,
232
+ "data_type": "float"
233
+ },
234
+ "model.layers.19.mlp.shared_experts.up_proj": {
235
+ "bits": 16,
236
+ "data_type": "float"
237
+ },
238
+ "model.layers.2.mlp.shared_experts.down_proj": {
239
+ "bits": 16,
240
+ "data_type": "float"
241
+ },
242
+ "model.layers.2.mlp.shared_experts.gate_proj": {
243
+ "bits": 16,
244
+ "data_type": "float"
245
+ },
246
+ "model.layers.2.mlp.shared_experts.up_proj": {
247
+ "bits": 16,
248
+ "data_type": "float"
249
+ },
250
+ "model.layers.20.mlp.shared_experts.down_proj": {
251
+ "bits": 16,
252
+ "data_type": "float"
253
+ },
254
+ "model.layers.20.mlp.shared_experts.gate_proj": {
255
+ "bits": 16,
256
+ "data_type": "float"
257
+ },
258
+ "model.layers.20.mlp.shared_experts.up_proj": {
259
+ "bits": 16,
260
+ "data_type": "float"
261
+ },
262
+ "model.layers.21.mlp.shared_experts.down_proj": {
263
+ "bits": 16,
264
+ "data_type": "float"
265
+ },
266
+ "model.layers.21.mlp.shared_experts.gate_proj": {
267
+ "bits": 16,
268
+ "data_type": "float"
269
+ },
270
+ "model.layers.21.mlp.shared_experts.up_proj": {
271
+ "bits": 16,
272
+ "data_type": "float"
273
+ },
274
+ "model.layers.22.mlp.shared_experts.down_proj": {
275
+ "bits": 16,
276
+ "data_type": "float"
277
+ },
278
+ "model.layers.22.mlp.shared_experts.gate_proj": {
279
+ "bits": 16,
280
+ "data_type": "float"
281
+ },
282
+ "model.layers.22.mlp.shared_experts.up_proj": {
283
+ "bits": 16,
284
+ "data_type": "float"
285
+ },
286
+ "model.layers.23.mlp.shared_experts.down_proj": {
287
+ "bits": 16,
288
+ "data_type": "float"
289
+ },
290
+ "model.layers.23.mlp.shared_experts.gate_proj": {
291
+ "bits": 16,
292
+ "data_type": "float"
293
+ },
294
+ "model.layers.23.mlp.shared_experts.up_proj": {
295
+ "bits": 16,
296
+ "data_type": "float"
297
+ },
298
+ "model.layers.24.mlp.shared_experts.down_proj": {
299
+ "bits": 16,
300
+ "data_type": "float"
301
+ },
302
+ "model.layers.24.mlp.shared_experts.gate_proj": {
303
+ "bits": 16,
304
+ "data_type": "float"
305
+ },
306
+ "model.layers.24.mlp.shared_experts.up_proj": {
307
+ "bits": 16,
308
+ "data_type": "float"
309
+ },
310
+ "model.layers.25.mlp.shared_experts.down_proj": {
311
+ "bits": 16,
312
+ "data_type": "float"
313
+ },
314
+ "model.layers.25.mlp.shared_experts.gate_proj": {
315
+ "bits": 16,
316
+ "data_type": "float"
317
+ },
318
+ "model.layers.25.mlp.shared_experts.up_proj": {
319
+ "bits": 16,
320
+ "data_type": "float"
321
+ },
322
+ "model.layers.26.mlp.shared_experts.down_proj": {
323
+ "bits": 16,
324
+ "data_type": "float"
325
+ },
326
+ "model.layers.26.mlp.shared_experts.gate_proj": {
327
+ "bits": 16,
328
+ "data_type": "float"
329
+ },
330
+ "model.layers.26.mlp.shared_experts.up_proj": {
331
+ "bits": 16,
332
+ "data_type": "float"
333
+ },
334
+ "model.layers.27.mlp.shared_experts.down_proj": {
335
+ "bits": 16,
336
+ "data_type": "float"
337
+ },
338
+ "model.layers.27.mlp.shared_experts.gate_proj": {
339
+ "bits": 16,
340
+ "data_type": "float"
341
+ },
342
+ "model.layers.27.mlp.shared_experts.up_proj": {
343
+ "bits": 16,
344
+ "data_type": "float"
345
+ },
346
+ "model.layers.28.mlp.shared_experts.down_proj": {
347
+ "bits": 16,
348
+ "data_type": "float"
349
+ },
350
+ "model.layers.28.mlp.shared_experts.gate_proj": {
351
+ "bits": 16,
352
+ "data_type": "float"
353
+ },
354
+ "model.layers.28.mlp.shared_experts.up_proj": {
355
+ "bits": 16,
356
+ "data_type": "float"
357
+ },
358
+ "model.layers.29.mlp.shared_experts.down_proj": {
359
+ "bits": 16,
360
+ "data_type": "float"
361
+ },
362
+ "model.layers.29.mlp.shared_experts.gate_proj": {
363
+ "bits": 16,
364
+ "data_type": "float"
365
+ },
366
+ "model.layers.29.mlp.shared_experts.up_proj": {
367
+ "bits": 16,
368
+ "data_type": "float"
369
+ },
370
+ "model.layers.3.mlp.shared_experts.down_proj": {
371
+ "bits": 16,
372
+ "data_type": "float"
373
+ },
374
+ "model.layers.3.mlp.shared_experts.gate_proj": {
375
+ "bits": 16,
376
+ "data_type": "float"
377
+ },
378
+ "model.layers.3.mlp.shared_experts.up_proj": {
379
+ "bits": 16,
380
+ "data_type": "float"
381
+ },
382
+ "model.layers.30.mlp.shared_experts.down_proj": {
383
+ "bits": 16,
384
+ "data_type": "float"
385
+ },
386
+ "model.layers.30.mlp.shared_experts.gate_proj": {
387
+ "bits": 16,
388
+ "data_type": "float"
389
+ },
390
+ "model.layers.30.mlp.shared_experts.up_proj": {
391
+ "bits": 16,
392
+ "data_type": "float"
393
+ },
394
+ "model.layers.31.mlp.shared_experts.down_proj": {
395
+ "bits": 16,
396
+ "data_type": "float"
397
+ },
398
+ "model.layers.31.mlp.shared_experts.gate_proj": {
399
+ "bits": 16,
400
+ "data_type": "float"
401
+ },
402
+ "model.layers.31.mlp.shared_experts.up_proj": {
403
+ "bits": 16,
404
+ "data_type": "float"
405
+ },
406
+ "model.layers.32.mlp.shared_experts.down_proj": {
407
+ "bits": 16,
408
+ "data_type": "float"
409
+ },
410
+ "model.layers.32.mlp.shared_experts.gate_proj": {
411
+ "bits": 16,
412
+ "data_type": "float"
413
+ },
414
+ "model.layers.32.mlp.shared_experts.up_proj": {
415
+ "bits": 16,
416
+ "data_type": "float"
417
+ },
418
+ "model.layers.33.mlp.shared_experts.down_proj": {
419
+ "bits": 16,
420
+ "data_type": "float"
421
+ },
422
+ "model.layers.33.mlp.shared_experts.gate_proj": {
423
+ "bits": 16,
424
+ "data_type": "float"
425
+ },
426
+ "model.layers.33.mlp.shared_experts.up_proj": {
427
+ "bits": 16,
428
+ "data_type": "float"
429
+ },
430
+ "model.layers.34.mlp.shared_experts.down_proj": {
431
+ "bits": 16,
432
+ "data_type": "float"
433
+ },
434
+ "model.layers.34.mlp.shared_experts.gate_proj": {
435
+ "bits": 16,
436
+ "data_type": "float"
437
+ },
438
+ "model.layers.34.mlp.shared_experts.up_proj": {
439
+ "bits": 16,
440
+ "data_type": "float"
441
+ },
442
+ "model.layers.35.mlp.shared_experts.down_proj": {
443
+ "bits": 16,
444
+ "data_type": "float"
445
+ },
446
+ "model.layers.35.mlp.shared_experts.gate_proj": {
447
+ "bits": 16,
448
+ "data_type": "float"
449
+ },
450
+ "model.layers.35.mlp.shared_experts.up_proj": {
451
+ "bits": 16,
452
+ "data_type": "float"
453
+ },
454
+ "model.layers.36.mlp.shared_experts.down_proj": {
455
+ "bits": 16,
456
+ "data_type": "float"
457
+ },
458
+ "model.layers.36.mlp.shared_experts.gate_proj": {
459
+ "bits": 16,
460
+ "data_type": "float"
461
+ },
462
+ "model.layers.36.mlp.shared_experts.up_proj": {
463
+ "bits": 16,
464
+ "data_type": "float"
465
+ },
466
+ "model.layers.37.mlp.shared_experts.down_proj": {
467
+ "bits": 16,
468
+ "data_type": "float"
469
+ },
470
+ "model.layers.37.mlp.shared_experts.gate_proj": {
471
+ "bits": 16,
472
+ "data_type": "float"
473
+ },
474
+ "model.layers.37.mlp.shared_experts.up_proj": {
475
+ "bits": 16,
476
+ "data_type": "float"
477
+ },
478
+ "model.layers.38.mlp.shared_experts.down_proj": {
479
+ "bits": 16,
480
+ "data_type": "float"
481
+ },
482
+ "model.layers.38.mlp.shared_experts.gate_proj": {
483
+ "bits": 16,
484
+ "data_type": "float"
485
+ },
486
+ "model.layers.38.mlp.shared_experts.up_proj": {
487
+ "bits": 16,
488
+ "data_type": "float"
489
+ },
490
+ "model.layers.39.mlp.shared_experts.down_proj": {
491
+ "bits": 16,
492
+ "data_type": "float"
493
+ },
494
+ "model.layers.39.mlp.shared_experts.gate_proj": {
495
+ "bits": 16,
496
+ "data_type": "float"
497
+ },
498
+ "model.layers.39.mlp.shared_experts.up_proj": {
499
+ "bits": 16,
500
+ "data_type": "float"
501
+ },
502
+ "model.layers.4.mlp.shared_experts.down_proj": {
503
+ "bits": 16,
504
+ "data_type": "float"
505
+ },
506
+ "model.layers.4.mlp.shared_experts.gate_proj": {
507
+ "bits": 16,
508
+ "data_type": "float"
509
+ },
510
+ "model.layers.4.mlp.shared_experts.up_proj": {
511
+ "bits": 16,
512
+ "data_type": "float"
513
+ },
514
+ "model.layers.40.mlp.shared_experts.down_proj": {
515
+ "bits": 16,
516
+ "data_type": "float"
517
+ },
518
+ "model.layers.40.mlp.shared_experts.gate_proj": {
519
+ "bits": 16,
520
+ "data_type": "float"
521
+ },
522
+ "model.layers.40.mlp.shared_experts.up_proj": {
523
+ "bits": 16,
524
+ "data_type": "float"
525
+ },
526
+ "model.layers.41.mlp.shared_experts.down_proj": {
527
+ "bits": 16,
528
+ "data_type": "float"
529
+ },
530
+ "model.layers.41.mlp.shared_experts.gate_proj": {
531
+ "bits": 16,
532
+ "data_type": "float"
533
+ },
534
+ "model.layers.41.mlp.shared_experts.up_proj": {
535
+ "bits": 16,
536
+ "data_type": "float"
537
+ },
538
+ "model.layers.42.mlp.shared_experts.down_proj": {
539
+ "bits": 16,
540
+ "data_type": "float"
541
+ },
542
+ "model.layers.42.mlp.shared_experts.gate_proj": {
543
+ "bits": 16,
544
+ "data_type": "float"
545
+ },
546
+ "model.layers.42.mlp.shared_experts.up_proj": {
547
+ "bits": 16,
548
+ "data_type": "float"
549
+ },
550
+ "model.layers.43.mlp.shared_experts.down_proj": {
551
+ "bits": 16,
552
+ "data_type": "float"
553
+ },
554
+ "model.layers.43.mlp.shared_experts.gate_proj": {
555
+ "bits": 16,
556
+ "data_type": "float"
557
+ },
558
+ "model.layers.43.mlp.shared_experts.up_proj": {
559
+ "bits": 16,
560
+ "data_type": "float"
561
+ },
562
+ "model.layers.44.mlp.shared_experts.down_proj": {
563
+ "bits": 16,
564
+ "data_type": "float"
565
+ },
566
+ "model.layers.44.mlp.shared_experts.gate_proj": {
567
+ "bits": 16,
568
+ "data_type": "float"
569
+ },
570
+ "model.layers.44.mlp.shared_experts.up_proj": {
571
+ "bits": 16,
572
+ "data_type": "float"
573
+ },
574
+ "model.layers.45.mlp.shared_experts.down_proj": {
575
+ "bits": 16,
576
+ "data_type": "float"
577
+ },
578
+ "model.layers.45.mlp.shared_experts.gate_proj": {
579
+ "bits": 16,
580
+ "data_type": "float"
581
+ },
582
+ "model.layers.45.mlp.shared_experts.up_proj": {
583
+ "bits": 16,
584
+ "data_type": "float"
585
+ },
586
+ "model.layers.46.mlp.shared_experts.down_proj": {
587
+ "bits": 16,
588
+ "data_type": "float"
589
+ },
590
+ "model.layers.46.mlp.shared_experts.gate_proj": {
591
+ "bits": 16,
592
+ "data_type": "float"
593
+ },
594
+ "model.layers.46.mlp.shared_experts.up_proj": {
595
+ "bits": 16,
596
+ "data_type": "float"
597
+ },
598
+ "model.layers.5.mlp.shared_experts.down_proj": {
599
+ "bits": 16,
600
+ "data_type": "float"
601
+ },
602
+ "model.layers.5.mlp.shared_experts.gate_proj": {
603
+ "bits": 16,
604
+ "data_type": "float"
605
+ },
606
+ "model.layers.5.mlp.shared_experts.up_proj": {
607
+ "bits": 16,
608
+ "data_type": "float"
609
+ },
610
+ "model.layers.6.mlp.shared_experts.down_proj": {
611
+ "bits": 16,
612
+ "data_type": "float"
613
+ },
614
+ "model.layers.6.mlp.shared_experts.gate_proj": {
615
+ "bits": 16,
616
+ "data_type": "float"
617
+ },
618
+ "model.layers.6.mlp.shared_experts.up_proj": {
619
+ "bits": 16,
620
+ "data_type": "float"
621
+ },
622
+ "model.layers.7.mlp.shared_experts.down_proj": {
623
+ "bits": 16,
624
+ "data_type": "float"
625
+ },
626
+ "model.layers.7.mlp.shared_experts.gate_proj": {
627
+ "bits": 16,
628
+ "data_type": "float"
629
+ },
630
+ "model.layers.7.mlp.shared_experts.up_proj": {
631
+ "bits": 16,
632
+ "data_type": "float"
633
+ },
634
+ "model.layers.8.mlp.shared_experts.down_proj": {
635
+ "bits": 16,
636
+ "data_type": "float"
637
+ },
638
+ "model.layers.8.mlp.shared_experts.gate_proj": {
639
+ "bits": 16,
640
+ "data_type": "float"
641
+ },
642
+ "model.layers.8.mlp.shared_experts.up_proj": {
643
+ "bits": 16,
644
+ "data_type": "float"
645
+ },
646
+ "model.layers.9.mlp.shared_experts.down_proj": {
647
+ "bits": 16,
648
+ "data_type": "float"
649
+ },
650
+ "model.layers.9.mlp.shared_experts.gate_proj": {
651
+ "bits": 16,
652
+ "data_type": "float"
653
+ },
654
+ "model.layers.9.mlp.shared_experts.up_proj": {
655
+ "bits": 16,
656
+ "data_type": "float"
657
+ }
658
+ },
659
+ "group_size": 128,
660
+ "packing_format": "auto_round:auto_gptq",
661
+ "quant_method": "auto-round",
662
+ "sym": true
663
+ },
664
+ "rms_norm_eps": 1e-05,
665
+ "rope_interleave": true,
666
+ "rope_parameters": {
667
+ "partial_rotary_factor": 1.0,
668
+ "rope_theta": 1000000,
669
+ "rope_type": "default"
670
+ },
671
+ "routed_scaling_factor": 1.8,
672
+ "tie_word_embeddings": false,
673
+ "topk_group": 1,
674
+ "topk_method": "noaux_tc",
675
+ "transformers_version": "5.0.0.dev0",
676
+ "use_cache": true,
677
+ "v_head_dim": 256,
678
+ "vocab_size": 154880
679
+ }
generation_config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "eos_token_id": [
4
+ 154820,
5
+ 154827,
6
+ 154829
7
+ ],
8
+ "pad_token_id": 154820,
9
+ "temperature": 1.0,
10
+ "transformers_version": "5.0.0.dev0"
11
+ }
model-00001-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73c7534263f28b130906d338644d19ebfa0e0a84343fa942cc98e6dfbc34327a
3
+ size 4999421760
model-00002-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d628847b28900945a268baa714fcd561121e12cf466ffd16fd140894e3f9ccb
3
+ size 4996667608
model-00003-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3a02c87f423a77636c18e11d7c7ad87df52edc4a23eaae5d38fa8a2ad515b26
3
+ size 4999795608
model-00004-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a96d8f889981530b14b730040f153300b96ba7597d620ec0aa3dacb75645ae96
3
+ size 2248343480
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
quantization_config.json ADDED
@@ -0,0 +1,575 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bits": 4,
3
+ "data_type": "int",
4
+ "group_size": 128,
5
+ "sym": true,
6
+ "autoround_version": "0.9.6",
7
+ "quant_method": "auto-round",
8
+ "packing_format": "auto_round:auto_gptq",
9
+ "extra_config": {
10
+ "model.layers.1.mlp.shared_experts.gate_proj": {
11
+ "bits": 16,
12
+ "data_type": "float"
13
+ },
14
+ "model.layers.1.mlp.shared_experts.up_proj": {
15
+ "bits": 16,
16
+ "data_type": "float"
17
+ },
18
+ "model.layers.1.mlp.shared_experts.down_proj": {
19
+ "bits": 16,
20
+ "data_type": "float"
21
+ },
22
+ "model.layers.2.mlp.shared_experts.gate_proj": {
23
+ "bits": 16,
24
+ "data_type": "float"
25
+ },
26
+ "model.layers.2.mlp.shared_experts.up_proj": {
27
+ "bits": 16,
28
+ "data_type": "float"
29
+ },
30
+ "model.layers.2.mlp.shared_experts.down_proj": {
31
+ "bits": 16,
32
+ "data_type": "float"
33
+ },
34
+ "model.layers.3.mlp.shared_experts.gate_proj": {
35
+ "bits": 16,
36
+ "data_type": "float"
37
+ },
38
+ "model.layers.3.mlp.shared_experts.up_proj": {
39
+ "bits": 16,
40
+ "data_type": "float"
41
+ },
42
+ "model.layers.3.mlp.shared_experts.down_proj": {
43
+ "bits": 16,
44
+ "data_type": "float"
45
+ },
46
+ "model.layers.4.mlp.shared_experts.gate_proj": {
47
+ "bits": 16,
48
+ "data_type": "float"
49
+ },
50
+ "model.layers.4.mlp.shared_experts.up_proj": {
51
+ "bits": 16,
52
+ "data_type": "float"
53
+ },
54
+ "model.layers.4.mlp.shared_experts.down_proj": {
55
+ "bits": 16,
56
+ "data_type": "float"
57
+ },
58
+ "model.layers.5.mlp.shared_experts.gate_proj": {
59
+ "bits": 16,
60
+ "data_type": "float"
61
+ },
62
+ "model.layers.5.mlp.shared_experts.up_proj": {
63
+ "bits": 16,
64
+ "data_type": "float"
65
+ },
66
+ "model.layers.5.mlp.shared_experts.down_proj": {
67
+ "bits": 16,
68
+ "data_type": "float"
69
+ },
70
+ "model.layers.6.mlp.shared_experts.gate_proj": {
71
+ "bits": 16,
72
+ "data_type": "float"
73
+ },
74
+ "model.layers.6.mlp.shared_experts.up_proj": {
75
+ "bits": 16,
76
+ "data_type": "float"
77
+ },
78
+ "model.layers.6.mlp.shared_experts.down_proj": {
79
+ "bits": 16,
80
+ "data_type": "float"
81
+ },
82
+ "model.layers.7.mlp.shared_experts.gate_proj": {
83
+ "bits": 16,
84
+ "data_type": "float"
85
+ },
86
+ "model.layers.7.mlp.shared_experts.up_proj": {
87
+ "bits": 16,
88
+ "data_type": "float"
89
+ },
90
+ "model.layers.7.mlp.shared_experts.down_proj": {
91
+ "bits": 16,
92
+ "data_type": "float"
93
+ },
94
+ "model.layers.8.mlp.shared_experts.gate_proj": {
95
+ "bits": 16,
96
+ "data_type": "float"
97
+ },
98
+ "model.layers.8.mlp.shared_experts.up_proj": {
99
+ "bits": 16,
100
+ "data_type": "float"
101
+ },
102
+ "model.layers.8.mlp.shared_experts.down_proj": {
103
+ "bits": 16,
104
+ "data_type": "float"
105
+ },
106
+ "model.layers.9.mlp.shared_experts.gate_proj": {
107
+ "bits": 16,
108
+ "data_type": "float"
109
+ },
110
+ "model.layers.9.mlp.shared_experts.up_proj": {
111
+ "bits": 16,
112
+ "data_type": "float"
113
+ },
114
+ "model.layers.9.mlp.shared_experts.down_proj": {
115
+ "bits": 16,
116
+ "data_type": "float"
117
+ },
118
+ "model.layers.10.mlp.shared_experts.gate_proj": {
119
+ "bits": 16,
120
+ "data_type": "float"
121
+ },
122
+ "model.layers.10.mlp.shared_experts.up_proj": {
123
+ "bits": 16,
124
+ "data_type": "float"
125
+ },
126
+ "model.layers.10.mlp.shared_experts.down_proj": {
127
+ "bits": 16,
128
+ "data_type": "float"
129
+ },
130
+ "model.layers.11.mlp.shared_experts.gate_proj": {
131
+ "bits": 16,
132
+ "data_type": "float"
133
+ },
134
+ "model.layers.11.mlp.shared_experts.up_proj": {
135
+ "bits": 16,
136
+ "data_type": "float"
137
+ },
138
+ "model.layers.11.mlp.shared_experts.down_proj": {
139
+ "bits": 16,
140
+ "data_type": "float"
141
+ },
142
+ "model.layers.12.mlp.shared_experts.gate_proj": {
143
+ "bits": 16,
144
+ "data_type": "float"
145
+ },
146
+ "model.layers.12.mlp.shared_experts.up_proj": {
147
+ "bits": 16,
148
+ "data_type": "float"
149
+ },
150
+ "model.layers.12.mlp.shared_experts.down_proj": {
151
+ "bits": 16,
152
+ "data_type": "float"
153
+ },
154
+ "model.layers.13.mlp.shared_experts.gate_proj": {
155
+ "bits": 16,
156
+ "data_type": "float"
157
+ },
158
+ "model.layers.13.mlp.shared_experts.up_proj": {
159
+ "bits": 16,
160
+ "data_type": "float"
161
+ },
162
+ "model.layers.13.mlp.shared_experts.down_proj": {
163
+ "bits": 16,
164
+ "data_type": "float"
165
+ },
166
+ "model.layers.14.mlp.shared_experts.gate_proj": {
167
+ "bits": 16,
168
+ "data_type": "float"
169
+ },
170
+ "model.layers.14.mlp.shared_experts.up_proj": {
171
+ "bits": 16,
172
+ "data_type": "float"
173
+ },
174
+ "model.layers.14.mlp.shared_experts.down_proj": {
175
+ "bits": 16,
176
+ "data_type": "float"
177
+ },
178
+ "model.layers.15.mlp.shared_experts.gate_proj": {
179
+ "bits": 16,
180
+ "data_type": "float"
181
+ },
182
+ "model.layers.15.mlp.shared_experts.up_proj": {
183
+ "bits": 16,
184
+ "data_type": "float"
185
+ },
186
+ "model.layers.15.mlp.shared_experts.down_proj": {
187
+ "bits": 16,
188
+ "data_type": "float"
189
+ },
190
+ "model.layers.16.mlp.shared_experts.gate_proj": {
191
+ "bits": 16,
192
+ "data_type": "float"
193
+ },
194
+ "model.layers.16.mlp.shared_experts.up_proj": {
195
+ "bits": 16,
196
+ "data_type": "float"
197
+ },
198
+ "model.layers.16.mlp.shared_experts.down_proj": {
199
+ "bits": 16,
200
+ "data_type": "float"
201
+ },
202
+ "model.layers.17.mlp.shared_experts.gate_proj": {
203
+ "bits": 16,
204
+ "data_type": "float"
205
+ },
206
+ "model.layers.17.mlp.shared_experts.up_proj": {
207
+ "bits": 16,
208
+ "data_type": "float"
209
+ },
210
+ "model.layers.17.mlp.shared_experts.down_proj": {
211
+ "bits": 16,
212
+ "data_type": "float"
213
+ },
214
+ "model.layers.18.mlp.shared_experts.gate_proj": {
215
+ "bits": 16,
216
+ "data_type": "float"
217
+ },
218
+ "model.layers.18.mlp.shared_experts.up_proj": {
219
+ "bits": 16,
220
+ "data_type": "float"
221
+ },
222
+ "model.layers.18.mlp.shared_experts.down_proj": {
223
+ "bits": 16,
224
+ "data_type": "float"
225
+ },
226
+ "model.layers.19.mlp.shared_experts.gate_proj": {
227
+ "bits": 16,
228
+ "data_type": "float"
229
+ },
230
+ "model.layers.19.mlp.shared_experts.up_proj": {
231
+ "bits": 16,
232
+ "data_type": "float"
233
+ },
234
+ "model.layers.19.mlp.shared_experts.down_proj": {
235
+ "bits": 16,
236
+ "data_type": "float"
237
+ },
238
+ "model.layers.20.mlp.shared_experts.gate_proj": {
239
+ "bits": 16,
240
+ "data_type": "float"
241
+ },
242
+ "model.layers.20.mlp.shared_experts.up_proj": {
243
+ "bits": 16,
244
+ "data_type": "float"
245
+ },
246
+ "model.layers.20.mlp.shared_experts.down_proj": {
247
+ "bits": 16,
248
+ "data_type": "float"
249
+ },
250
+ "model.layers.21.mlp.shared_experts.gate_proj": {
251
+ "bits": 16,
252
+ "data_type": "float"
253
+ },
254
+ "model.layers.21.mlp.shared_experts.up_proj": {
255
+ "bits": 16,
256
+ "data_type": "float"
257
+ },
258
+ "model.layers.21.mlp.shared_experts.down_proj": {
259
+ "bits": 16,
260
+ "data_type": "float"
261
+ },
262
+ "model.layers.22.mlp.shared_experts.gate_proj": {
263
+ "bits": 16,
264
+ "data_type": "float"
265
+ },
266
+ "model.layers.22.mlp.shared_experts.up_proj": {
267
+ "bits": 16,
268
+ "data_type": "float"
269
+ },
270
+ "model.layers.22.mlp.shared_experts.down_proj": {
271
+ "bits": 16,
272
+ "data_type": "float"
273
+ },
274
+ "model.layers.23.mlp.shared_experts.gate_proj": {
275
+ "bits": 16,
276
+ "data_type": "float"
277
+ },
278
+ "model.layers.23.mlp.shared_experts.up_proj": {
279
+ "bits": 16,
280
+ "data_type": "float"
281
+ },
282
+ "model.layers.23.mlp.shared_experts.down_proj": {
283
+ "bits": 16,
284
+ "data_type": "float"
285
+ },
286
+ "model.layers.24.mlp.shared_experts.gate_proj": {
287
+ "bits": 16,
288
+ "data_type": "float"
289
+ },
290
+ "model.layers.24.mlp.shared_experts.up_proj": {
291
+ "bits": 16,
292
+ "data_type": "float"
293
+ },
294
+ "model.layers.24.mlp.shared_experts.down_proj": {
295
+ "bits": 16,
296
+ "data_type": "float"
297
+ },
298
+ "model.layers.25.mlp.shared_experts.gate_proj": {
299
+ "bits": 16,
300
+ "data_type": "float"
301
+ },
302
+ "model.layers.25.mlp.shared_experts.up_proj": {
303
+ "bits": 16,
304
+ "data_type": "float"
305
+ },
306
+ "model.layers.25.mlp.shared_experts.down_proj": {
307
+ "bits": 16,
308
+ "data_type": "float"
309
+ },
310
+ "model.layers.26.mlp.shared_experts.gate_proj": {
311
+ "bits": 16,
312
+ "data_type": "float"
313
+ },
314
+ "model.layers.26.mlp.shared_experts.up_proj": {
315
+ "bits": 16,
316
+ "data_type": "float"
317
+ },
318
+ "model.layers.26.mlp.shared_experts.down_proj": {
319
+ "bits": 16,
320
+ "data_type": "float"
321
+ },
322
+ "model.layers.27.mlp.shared_experts.gate_proj": {
323
+ "bits": 16,
324
+ "data_type": "float"
325
+ },
326
+ "model.layers.27.mlp.shared_experts.up_proj": {
327
+ "bits": 16,
328
+ "data_type": "float"
329
+ },
330
+ "model.layers.27.mlp.shared_experts.down_proj": {
331
+ "bits": 16,
332
+ "data_type": "float"
333
+ },
334
+ "model.layers.28.mlp.shared_experts.gate_proj": {
335
+ "bits": 16,
336
+ "data_type": "float"
337
+ },
338
+ "model.layers.28.mlp.shared_experts.up_proj": {
339
+ "bits": 16,
340
+ "data_type": "float"
341
+ },
342
+ "model.layers.28.mlp.shared_experts.down_proj": {
343
+ "bits": 16,
344
+ "data_type": "float"
345
+ },
346
+ "model.layers.29.mlp.shared_experts.gate_proj": {
347
+ "bits": 16,
348
+ "data_type": "float"
349
+ },
350
+ "model.layers.29.mlp.shared_experts.up_proj": {
351
+ "bits": 16,
352
+ "data_type": "float"
353
+ },
354
+ "model.layers.29.mlp.shared_experts.down_proj": {
355
+ "bits": 16,
356
+ "data_type": "float"
357
+ },
358
+ "model.layers.30.mlp.shared_experts.gate_proj": {
359
+ "bits": 16,
360
+ "data_type": "float"
361
+ },
362
+ "model.layers.30.mlp.shared_experts.up_proj": {
363
+ "bits": 16,
364
+ "data_type": "float"
365
+ },
366
+ "model.layers.30.mlp.shared_experts.down_proj": {
367
+ "bits": 16,
368
+ "data_type": "float"
369
+ },
370
+ "model.layers.31.mlp.shared_experts.gate_proj": {
371
+ "bits": 16,
372
+ "data_type": "float"
373
+ },
374
+ "model.layers.31.mlp.shared_experts.up_proj": {
375
+ "bits": 16,
376
+ "data_type": "float"
377
+ },
378
+ "model.layers.31.mlp.shared_experts.down_proj": {
379
+ "bits": 16,
380
+ "data_type": "float"
381
+ },
382
+ "model.layers.32.mlp.shared_experts.gate_proj": {
383
+ "bits": 16,
384
+ "data_type": "float"
385
+ },
386
+ "model.layers.32.mlp.shared_experts.up_proj": {
387
+ "bits": 16,
388
+ "data_type": "float"
389
+ },
390
+ "model.layers.32.mlp.shared_experts.down_proj": {
391
+ "bits": 16,
392
+ "data_type": "float"
393
+ },
394
+ "model.layers.33.mlp.shared_experts.gate_proj": {
395
+ "bits": 16,
396
+ "data_type": "float"
397
+ },
398
+ "model.layers.33.mlp.shared_experts.up_proj": {
399
+ "bits": 16,
400
+ "data_type": "float"
401
+ },
402
+ "model.layers.33.mlp.shared_experts.down_proj": {
403
+ "bits": 16,
404
+ "data_type": "float"
405
+ },
406
+ "model.layers.34.mlp.shared_experts.gate_proj": {
407
+ "bits": 16,
408
+ "data_type": "float"
409
+ },
410
+ "model.layers.34.mlp.shared_experts.up_proj": {
411
+ "bits": 16,
412
+ "data_type": "float"
413
+ },
414
+ "model.layers.34.mlp.shared_experts.down_proj": {
415
+ "bits": 16,
416
+ "data_type": "float"
417
+ },
418
+ "model.layers.35.mlp.shared_experts.gate_proj": {
419
+ "bits": 16,
420
+ "data_type": "float"
421
+ },
422
+ "model.layers.35.mlp.shared_experts.up_proj": {
423
+ "bits": 16,
424
+ "data_type": "float"
425
+ },
426
+ "model.layers.35.mlp.shared_experts.down_proj": {
427
+ "bits": 16,
428
+ "data_type": "float"
429
+ },
430
+ "model.layers.36.mlp.shared_experts.gate_proj": {
431
+ "bits": 16,
432
+ "data_type": "float"
433
+ },
434
+ "model.layers.36.mlp.shared_experts.up_proj": {
435
+ "bits": 16,
436
+ "data_type": "float"
437
+ },
438
+ "model.layers.36.mlp.shared_experts.down_proj": {
439
+ "bits": 16,
440
+ "data_type": "float"
441
+ },
442
+ "model.layers.37.mlp.shared_experts.gate_proj": {
443
+ "bits": 16,
444
+ "data_type": "float"
445
+ },
446
+ "model.layers.37.mlp.shared_experts.up_proj": {
447
+ "bits": 16,
448
+ "data_type": "float"
449
+ },
450
+ "model.layers.37.mlp.shared_experts.down_proj": {
451
+ "bits": 16,
452
+ "data_type": "float"
453
+ },
454
+ "model.layers.38.mlp.shared_experts.gate_proj": {
455
+ "bits": 16,
456
+ "data_type": "float"
457
+ },
458
+ "model.layers.38.mlp.shared_experts.up_proj": {
459
+ "bits": 16,
460
+ "data_type": "float"
461
+ },
462
+ "model.layers.38.mlp.shared_experts.down_proj": {
463
+ "bits": 16,
464
+ "data_type": "float"
465
+ },
466
+ "model.layers.39.mlp.shared_experts.gate_proj": {
467
+ "bits": 16,
468
+ "data_type": "float"
469
+ },
470
+ "model.layers.39.mlp.shared_experts.up_proj": {
471
+ "bits": 16,
472
+ "data_type": "float"
473
+ },
474
+ "model.layers.39.mlp.shared_experts.down_proj": {
475
+ "bits": 16,
476
+ "data_type": "float"
477
+ },
478
+ "model.layers.40.mlp.shared_experts.gate_proj": {
479
+ "bits": 16,
480
+ "data_type": "float"
481
+ },
482
+ "model.layers.40.mlp.shared_experts.up_proj": {
483
+ "bits": 16,
484
+ "data_type": "float"
485
+ },
486
+ "model.layers.40.mlp.shared_experts.down_proj": {
487
+ "bits": 16,
488
+ "data_type": "float"
489
+ },
490
+ "model.layers.41.mlp.shared_experts.gate_proj": {
491
+ "bits": 16,
492
+ "data_type": "float"
493
+ },
494
+ "model.layers.41.mlp.shared_experts.up_proj": {
495
+ "bits": 16,
496
+ "data_type": "float"
497
+ },
498
+ "model.layers.41.mlp.shared_experts.down_proj": {
499
+ "bits": 16,
500
+ "data_type": "float"
501
+ },
502
+ "model.layers.42.mlp.shared_experts.gate_proj": {
503
+ "bits": 16,
504
+ "data_type": "float"
505
+ },
506
+ "model.layers.42.mlp.shared_experts.up_proj": {
507
+ "bits": 16,
508
+ "data_type": "float"
509
+ },
510
+ "model.layers.42.mlp.shared_experts.down_proj": {
511
+ "bits": 16,
512
+ "data_type": "float"
513
+ },
514
+ "model.layers.43.mlp.shared_experts.gate_proj": {
515
+ "bits": 16,
516
+ "data_type": "float"
517
+ },
518
+ "model.layers.43.mlp.shared_experts.up_proj": {
519
+ "bits": 16,
520
+ "data_type": "float"
521
+ },
522
+ "model.layers.43.mlp.shared_experts.down_proj": {
523
+ "bits": 16,
524
+ "data_type": "float"
525
+ },
526
+ "model.layers.44.mlp.shared_experts.gate_proj": {
527
+ "bits": 16,
528
+ "data_type": "float"
529
+ },
530
+ "model.layers.44.mlp.shared_experts.up_proj": {
531
+ "bits": 16,
532
+ "data_type": "float"
533
+ },
534
+ "model.layers.44.mlp.shared_experts.down_proj": {
535
+ "bits": 16,
536
+ "data_type": "float"
537
+ },
538
+ "model.layers.45.mlp.shared_experts.gate_proj": {
539
+ "bits": 16,
540
+ "data_type": "float"
541
+ },
542
+ "model.layers.45.mlp.shared_experts.up_proj": {
543
+ "bits": 16,
544
+ "data_type": "float"
545
+ },
546
+ "model.layers.45.mlp.shared_experts.down_proj": {
547
+ "bits": 16,
548
+ "data_type": "float"
549
+ },
550
+ "model.layers.46.mlp.shared_experts.gate_proj": {
551
+ "bits": 16,
552
+ "data_type": "float"
553
+ },
554
+ "model.layers.46.mlp.shared_experts.up_proj": {
555
+ "bits": 16,
556
+ "data_type": "float"
557
+ },
558
+ "model.layers.46.mlp.shared_experts.down_proj": {
559
+ "bits": 16,
560
+ "data_type": "float"
561
+ },
562
+ "model.layers.0.mlp.gate_proj": {
563
+ "bits": 16,
564
+ "data_type": "float"
565
+ },
566
+ "model.layers.0.mlp.up_proj": {
567
+ "bits": 16,
568
+ "data_type": "float"
569
+ },
570
+ "model.layers.0.mlp.down_proj": {
571
+ "bits": 16,
572
+ "data_type": "float"
573
+ }
574
+ }
575
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cfe2c099a7702a0921abc315ee039deb51e4a34b4818fc509bd27fa3dc4acc1
3
+ size 20217541
tokenizer_config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "backend": "tokenizers",
3
+ "clean_up_tokenization_spaces": false,
4
+ "do_lower_case": false,
5
+ "eos_token": "<|endoftext|>",
6
+ "extra_special_tokens": [
7
+ "<|endoftext|>",
8
+ "[MASK]",
9
+ "[gMASK]",
10
+ "[sMASK]",
11
+ "<sop>",
12
+ "<eop>",
13
+ "<|system|>",
14
+ "<|user|>",
15
+ "<|assistant|>",
16
+ "<|observation|>",
17
+ "<|begin_of_image|>",
18
+ "<|end_of_image|>",
19
+ "<|begin_of_video|>",
20
+ "<|end_of_video|>",
21
+ "<|begin_of_audio|>",
22
+ "<|end_of_audio|>",
23
+ "<|begin_of_transcription|>",
24
+ "<|end_of_transcription|>"
25
+ ],
26
+ "is_local": true,
27
+ "model_max_length": 128000,
28
+ "model_specific_special_tokens": {},
29
+ "pad_token": "<|endoftext|>",
30
+ "padding_side": "left",
31
+ "remove_space": false,
32
+ "tokenizer_class": "TokenizersBackend"
33
+ }