TobDeBer commited on
Commit
b397f93
·
verified ·
1 Parent(s): bb5d438

Upload GLM-4.5-Air-UD-Q2_K_XL.smartquant.json with huggingface_hub

Browse files
GLM-4.5-Air-UD-Q2_K_XL.smartquant.json ADDED
@@ -0,0 +1,805 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "output.weight": 14,
3
+ "output_norm.weight": 0,
4
+ "token_embd.weight": 12,
5
+ "blk.0.attn_k.bias": 0,
6
+ "blk.0.attn_k.weight": 12,
7
+ "blk.0.attn_norm.weight": 0,
8
+ "blk.0.attn_output.weight": 12,
9
+ "blk.0.attn_q.bias": 0,
10
+ "blk.0.attn_q.weight": 12,
11
+ "blk.0.attn_v.bias": 0,
12
+ "blk.0.attn_v.weight": 12,
13
+ "blk.0.ffn_down.weight": 20,
14
+ "blk.0.ffn_gate.weight": 12,
15
+ "blk.0.ffn_up.weight": 12,
16
+ "blk.0.post_attention_norm.weight": 0,
17
+ "blk.1.attn_k.bias": 0,
18
+ "blk.1.attn_k.weight": 13,
19
+ "blk.1.attn_norm.weight": 0,
20
+ "blk.1.attn_output.weight": 13,
21
+ "blk.1.attn_q.bias": 0,
22
+ "blk.1.attn_q.weight": 13,
23
+ "blk.1.attn_v.bias": 0,
24
+ "blk.1.attn_v.weight": 13,
25
+ "blk.1.exp_probs_b.bias": 0,
26
+ "blk.1.ffn_down_exps.weight": 20,
27
+ "blk.1.ffn_down_shexp.weight": 20,
28
+ "blk.1.ffn_gate_exps.weight": 10,
29
+ "blk.1.ffn_gate_inp.weight": 0,
30
+ "blk.1.ffn_gate_shexp.weight": 12,
31
+ "blk.1.ffn_up_exps.weight": 10,
32
+ "blk.1.ffn_up_shexp.weight": 12,
33
+ "blk.1.post_attention_norm.weight": 0,
34
+ "blk.2.attn_k.bias": 0,
35
+ "blk.2.attn_k.weight": 13,
36
+ "blk.2.attn_norm.weight": 0,
37
+ "blk.2.attn_output.weight": 13,
38
+ "blk.2.attn_q.bias": 0,
39
+ "blk.2.attn_q.weight": 13,
40
+ "blk.2.attn_v.bias": 0,
41
+ "blk.2.attn_v.weight": 13,
42
+ "blk.2.exp_probs_b.bias": 0,
43
+ "blk.2.ffn_down_exps.weight": 20,
44
+ "blk.2.ffn_down_shexp.weight": 20,
45
+ "blk.2.ffn_gate_exps.weight": 10,
46
+ "blk.2.ffn_gate_inp.weight": 0,
47
+ "blk.2.ffn_gate_shexp.weight": 12,
48
+ "blk.2.ffn_up_exps.weight": 10,
49
+ "blk.2.ffn_up_shexp.weight": 12,
50
+ "blk.2.post_attention_norm.weight": 0,
51
+ "blk.3.attn_k.bias": 0,
52
+ "blk.3.attn_k.weight": 12,
53
+ "blk.3.attn_norm.weight": 0,
54
+ "blk.3.attn_output.weight": 12,
55
+ "blk.3.attn_q.bias": 0,
56
+ "blk.3.attn_q.weight": 12,
57
+ "blk.3.attn_v.bias": 0,
58
+ "blk.3.attn_v.weight": 12,
59
+ "blk.3.exp_probs_b.bias": 0,
60
+ "blk.3.ffn_down_exps.weight": 20,
61
+ "blk.3.ffn_down_shexp.weight": 20,
62
+ "blk.3.ffn_gate_exps.weight": 10,
63
+ "blk.3.ffn_gate_inp.weight": 0,
64
+ "blk.3.ffn_gate_shexp.weight": 12,
65
+ "blk.3.ffn_up_exps.weight": 10,
66
+ "blk.3.ffn_up_shexp.weight": 12,
67
+ "blk.3.post_attention_norm.weight": 0,
68
+ "blk.4.attn_k.bias": 0,
69
+ "blk.4.attn_k.weight": 12,
70
+ "blk.4.attn_norm.weight": 0,
71
+ "blk.4.attn_output.weight": 12,
72
+ "blk.4.attn_q.bias": 0,
73
+ "blk.4.attn_q.weight": 12,
74
+ "blk.4.attn_v.bias": 0,
75
+ "blk.4.attn_v.weight": 12,
76
+ "blk.4.exp_probs_b.bias": 0,
77
+ "blk.4.ffn_down_exps.weight": 20,
78
+ "blk.4.ffn_down_shexp.weight": 20,
79
+ "blk.4.ffn_gate_exps.weight": 10,
80
+ "blk.4.ffn_gate_inp.weight": 0,
81
+ "blk.4.ffn_gate_shexp.weight": 12,
82
+ "blk.4.ffn_up_exps.weight": 10,
83
+ "blk.4.ffn_up_shexp.weight": 12,
84
+ "blk.4.post_attention_norm.weight": 0,
85
+ "blk.5.attn_k.bias": 0,
86
+ "blk.5.attn_k.weight": 12,
87
+ "blk.5.attn_norm.weight": 0,
88
+ "blk.5.attn_output.weight": 12,
89
+ "blk.5.attn_q.bias": 0,
90
+ "blk.5.attn_q.weight": 12,
91
+ "blk.5.attn_v.bias": 0,
92
+ "blk.5.attn_v.weight": 12,
93
+ "blk.5.exp_probs_b.bias": 0,
94
+ "blk.5.ffn_down_exps.weight": 20,
95
+ "blk.5.ffn_down_shexp.weight": 20,
96
+ "blk.5.ffn_gate_exps.weight": 10,
97
+ "blk.5.ffn_gate_inp.weight": 0,
98
+ "blk.5.ffn_gate_shexp.weight": 12,
99
+ "blk.5.ffn_up_exps.weight": 10,
100
+ "blk.5.ffn_up_shexp.weight": 12,
101
+ "blk.5.post_attention_norm.weight": 0,
102
+ "blk.6.attn_k.bias": 0,
103
+ "blk.6.attn_k.weight": 12,
104
+ "blk.6.attn_norm.weight": 0,
105
+ "blk.6.attn_output.weight": 12,
106
+ "blk.6.attn_q.bias": 0,
107
+ "blk.6.attn_q.weight": 12,
108
+ "blk.6.attn_v.bias": 0,
109
+ "blk.6.attn_v.weight": 12,
110
+ "blk.6.exp_probs_b.bias": 0,
111
+ "blk.6.ffn_down_exps.weight": 20,
112
+ "blk.6.ffn_down_shexp.weight": 20,
113
+ "blk.6.ffn_gate_exps.weight": 10,
114
+ "blk.6.ffn_gate_inp.weight": 0,
115
+ "blk.6.ffn_gate_shexp.weight": 12,
116
+ "blk.6.ffn_up_exps.weight": 10,
117
+ "blk.6.ffn_up_shexp.weight": 12,
118
+ "blk.6.post_attention_norm.weight": 0,
119
+ "blk.7.attn_k.bias": 0,
120
+ "blk.7.attn_k.weight": 12,
121
+ "blk.7.attn_norm.weight": 0,
122
+ "blk.7.attn_output.weight": 12,
123
+ "blk.7.attn_q.bias": 0,
124
+ "blk.7.attn_q.weight": 12,
125
+ "blk.7.attn_v.bias": 0,
126
+ "blk.7.attn_v.weight": 12,
127
+ "blk.7.exp_probs_b.bias": 0,
128
+ "blk.7.ffn_down_exps.weight": 20,
129
+ "blk.7.ffn_down_shexp.weight": 20,
130
+ "blk.7.ffn_gate_exps.weight": 10,
131
+ "blk.7.ffn_gate_inp.weight": 0,
132
+ "blk.7.ffn_gate_shexp.weight": 12,
133
+ "blk.7.ffn_up_exps.weight": 10,
134
+ "blk.7.ffn_up_shexp.weight": 12,
135
+ "blk.7.post_attention_norm.weight": 0,
136
+ "blk.8.attn_k.bias": 0,
137
+ "blk.8.attn_k.weight": 12,
138
+ "blk.8.attn_norm.weight": 0,
139
+ "blk.8.attn_output.weight": 12,
140
+ "blk.8.attn_q.bias": 0,
141
+ "blk.8.attn_q.weight": 12,
142
+ "blk.8.attn_v.bias": 0,
143
+ "blk.8.attn_v.weight": 12,
144
+ "blk.8.exp_probs_b.bias": 0,
145
+ "blk.8.ffn_down_exps.weight": 20,
146
+ "blk.8.ffn_down_shexp.weight": 20,
147
+ "blk.8.ffn_gate_exps.weight": 10,
148
+ "blk.8.ffn_gate_inp.weight": 0,
149
+ "blk.8.ffn_gate_shexp.weight": 12,
150
+ "blk.8.ffn_up_exps.weight": 10,
151
+ "blk.8.ffn_up_shexp.weight": 12,
152
+ "blk.8.post_attention_norm.weight": 0,
153
+ "blk.9.attn_k.bias": 0,
154
+ "blk.9.attn_k.weight": 12,
155
+ "blk.9.attn_norm.weight": 0,
156
+ "blk.9.attn_output.weight": 12,
157
+ "blk.9.attn_q.bias": 0,
158
+ "blk.9.attn_q.weight": 12,
159
+ "blk.9.attn_v.bias": 0,
160
+ "blk.9.attn_v.weight": 12,
161
+ "blk.9.exp_probs_b.bias": 0,
162
+ "blk.9.ffn_down_exps.weight": 20,
163
+ "blk.9.ffn_down_shexp.weight": 20,
164
+ "blk.9.ffn_gate_exps.weight": 10,
165
+ "blk.9.ffn_gate_inp.weight": 0,
166
+ "blk.9.ffn_gate_shexp.weight": 12,
167
+ "blk.9.ffn_up_exps.weight": 10,
168
+ "blk.9.ffn_up_shexp.weight": 12,
169
+ "blk.9.post_attention_norm.weight": 0,
170
+ "blk.10.attn_k.bias": 0,
171
+ "blk.10.attn_k.weight": 12,
172
+ "blk.10.attn_norm.weight": 0,
173
+ "blk.10.attn_output.weight": 12,
174
+ "blk.10.attn_q.bias": 0,
175
+ "blk.10.attn_q.weight": 12,
176
+ "blk.10.attn_v.bias": 0,
177
+ "blk.10.attn_v.weight": 12,
178
+ "blk.10.exp_probs_b.bias": 0,
179
+ "blk.10.ffn_down_exps.weight": 20,
180
+ "blk.10.ffn_down_shexp.weight": 20,
181
+ "blk.10.ffn_gate_exps.weight": 10,
182
+ "blk.10.ffn_gate_inp.weight": 0,
183
+ "blk.10.ffn_gate_shexp.weight": 12,
184
+ "blk.10.ffn_up_exps.weight": 10,
185
+ "blk.10.ffn_up_shexp.weight": 12,
186
+ "blk.10.post_attention_norm.weight": 0,
187
+ "blk.11.attn_k.bias": 0,
188
+ "blk.11.attn_k.weight": 12,
189
+ "blk.11.attn_norm.weight": 0,
190
+ "blk.11.attn_output.weight": 12,
191
+ "blk.11.attn_q.bias": 0,
192
+ "blk.11.attn_q.weight": 12,
193
+ "blk.11.attn_v.bias": 0,
194
+ "blk.11.attn_v.weight": 12,
195
+ "blk.11.exp_probs_b.bias": 0,
196
+ "blk.11.ffn_down_exps.weight": 20,
197
+ "blk.11.ffn_down_shexp.weight": 20,
198
+ "blk.11.ffn_gate_exps.weight": 10,
199
+ "blk.11.ffn_gate_inp.weight": 0,
200
+ "blk.11.ffn_gate_shexp.weight": 12,
201
+ "blk.11.ffn_up_exps.weight": 10,
202
+ "blk.11.ffn_up_shexp.weight": 12,
203
+ "blk.11.post_attention_norm.weight": 0,
204
+ "blk.12.attn_k.bias": 0,
205
+ "blk.12.attn_k.weight": 12,
206
+ "blk.12.attn_norm.weight": 0,
207
+ "blk.12.attn_output.weight": 12,
208
+ "blk.12.attn_q.bias": 0,
209
+ "blk.12.attn_q.weight": 12,
210
+ "blk.12.attn_v.bias": 0,
211
+ "blk.12.attn_v.weight": 12,
212
+ "blk.12.exp_probs_b.bias": 0,
213
+ "blk.12.ffn_down_exps.weight": 20,
214
+ "blk.12.ffn_down_shexp.weight": 20,
215
+ "blk.12.ffn_gate_exps.weight": 10,
216
+ "blk.12.ffn_gate_inp.weight": 0,
217
+ "blk.12.ffn_gate_shexp.weight": 12,
218
+ "blk.12.ffn_up_exps.weight": 10,
219
+ "blk.12.ffn_up_shexp.weight": 12,
220
+ "blk.12.post_attention_norm.weight": 0,
221
+ "blk.13.attn_k.bias": 0,
222
+ "blk.13.attn_k.weight": 12,
223
+ "blk.13.attn_norm.weight": 0,
224
+ "blk.13.attn_output.weight": 12,
225
+ "blk.13.attn_q.bias": 0,
226
+ "blk.13.attn_q.weight": 12,
227
+ "blk.13.attn_v.bias": 0,
228
+ "blk.13.attn_v.weight": 12,
229
+ "blk.13.exp_probs_b.bias": 0,
230
+ "blk.13.ffn_down_exps.weight": 20,
231
+ "blk.13.ffn_down_shexp.weight": 20,
232
+ "blk.13.ffn_gate_exps.weight": 10,
233
+ "blk.13.ffn_gate_inp.weight": 0,
234
+ "blk.13.ffn_gate_shexp.weight": 12,
235
+ "blk.13.ffn_up_exps.weight": 10,
236
+ "blk.13.ffn_up_shexp.weight": 12,
237
+ "blk.13.post_attention_norm.weight": 0,
238
+ "blk.14.attn_k.bias": 0,
239
+ "blk.14.attn_k.weight": 12,
240
+ "blk.14.attn_norm.weight": 0,
241
+ "blk.14.attn_output.weight": 12,
242
+ "blk.14.attn_q.bias": 0,
243
+ "blk.14.attn_q.weight": 12,
244
+ "blk.14.attn_v.bias": 0,
245
+ "blk.14.attn_v.weight": 12,
246
+ "blk.14.exp_probs_b.bias": 0,
247
+ "blk.14.ffn_down_exps.weight": 20,
248
+ "blk.14.ffn_down_shexp.weight": 20,
249
+ "blk.14.ffn_gate_exps.weight": 10,
250
+ "blk.14.ffn_gate_inp.weight": 0,
251
+ "blk.14.ffn_gate_shexp.weight": 12,
252
+ "blk.14.ffn_up_exps.weight": 10,
253
+ "blk.14.ffn_up_shexp.weight": 12,
254
+ "blk.14.post_attention_norm.weight": 0,
255
+ "blk.15.attn_k.bias": 0,
256
+ "blk.15.attn_k.weight": 12,
257
+ "blk.15.attn_norm.weight": 0,
258
+ "blk.15.attn_output.weight": 12,
259
+ "blk.15.attn_q.bias": 0,
260
+ "blk.15.attn_q.weight": 12,
261
+ "blk.15.attn_v.bias": 0,
262
+ "blk.15.attn_v.weight": 12,
263
+ "blk.15.exp_probs_b.bias": 0,
264
+ "blk.15.ffn_down_exps.weight": 20,
265
+ "blk.15.ffn_down_shexp.weight": 20,
266
+ "blk.15.ffn_gate_exps.weight": 10,
267
+ "blk.15.ffn_gate_inp.weight": 0,
268
+ "blk.15.ffn_gate_shexp.weight": 12,
269
+ "blk.15.ffn_up_exps.weight": 10,
270
+ "blk.15.ffn_up_shexp.weight": 12,
271
+ "blk.15.post_attention_norm.weight": 0,
272
+ "blk.16.attn_k.bias": 0,
273
+ "blk.16.attn_k.weight": 12,
274
+ "blk.16.attn_norm.weight": 0,
275
+ "blk.16.attn_output.weight": 12,
276
+ "blk.16.attn_q.bias": 0,
277
+ "blk.16.attn_q.weight": 12,
278
+ "blk.16.attn_v.bias": 0,
279
+ "blk.16.attn_v.weight": 12,
280
+ "blk.16.exp_probs_b.bias": 0,
281
+ "blk.16.ffn_down_exps.weight": 20,
282
+ "blk.16.ffn_down_shexp.weight": 20,
283
+ "blk.16.ffn_gate_exps.weight": 10,
284
+ "blk.16.ffn_gate_inp.weight": 0,
285
+ "blk.16.ffn_gate_shexp.weight": 12,
286
+ "blk.16.ffn_up_exps.weight": 10,
287
+ "blk.16.ffn_up_shexp.weight": 12,
288
+ "blk.16.post_attention_norm.weight": 0,
289
+ "blk.17.attn_k.bias": 0,
290
+ "blk.17.attn_k.weight": 12,
291
+ "blk.17.attn_norm.weight": 0,
292
+ "blk.17.attn_output.weight": 12,
293
+ "blk.17.attn_q.bias": 0,
294
+ "blk.17.attn_q.weight": 12,
295
+ "blk.17.attn_v.bias": 0,
296
+ "blk.17.attn_v.weight": 12,
297
+ "blk.17.exp_probs_b.bias": 0,
298
+ "blk.17.ffn_down_exps.weight": 20,
299
+ "blk.17.ffn_down_shexp.weight": 20,
300
+ "blk.17.ffn_gate_exps.weight": 10,
301
+ "blk.17.ffn_gate_inp.weight": 0,
302
+ "blk.17.ffn_gate_shexp.weight": 12,
303
+ "blk.17.ffn_up_exps.weight": 10,
304
+ "blk.17.ffn_up_shexp.weight": 12,
305
+ "blk.17.post_attention_norm.weight": 0,
306
+ "blk.18.attn_k.bias": 0,
307
+ "blk.18.attn_k.weight": 12,
308
+ "blk.18.attn_norm.weight": 0,
309
+ "blk.18.attn_output.weight": 12,
310
+ "blk.18.attn_q.bias": 0,
311
+ "blk.18.attn_q.weight": 12,
312
+ "blk.18.attn_v.bias": 0,
313
+ "blk.18.attn_v.weight": 12,
314
+ "blk.18.exp_probs_b.bias": 0,
315
+ "blk.18.ffn_down_exps.weight": 20,
316
+ "blk.18.ffn_down_shexp.weight": 20,
317
+ "blk.18.ffn_gate_exps.weight": 10,
318
+ "blk.18.ffn_gate_inp.weight": 0,
319
+ "blk.18.ffn_gate_shexp.weight": 12,
320
+ "blk.18.ffn_up_exps.weight": 10,
321
+ "blk.18.ffn_up_shexp.weight": 12,
322
+ "blk.18.post_attention_norm.weight": 0,
323
+ "blk.19.attn_k.bias": 0,
324
+ "blk.19.attn_k.weight": 12,
325
+ "blk.19.attn_norm.weight": 0,
326
+ "blk.19.attn_output.weight": 12,
327
+ "blk.19.attn_q.bias": 0,
328
+ "blk.19.attn_q.weight": 12,
329
+ "blk.19.attn_v.bias": 0,
330
+ "blk.19.attn_v.weight": 12,
331
+ "blk.19.exp_probs_b.bias": 0,
332
+ "blk.19.ffn_down_exps.weight": 20,
333
+ "blk.19.ffn_down_shexp.weight": 20,
334
+ "blk.19.ffn_gate_exps.weight": 10,
335
+ "blk.19.ffn_gate_inp.weight": 0,
336
+ "blk.19.ffn_gate_shexp.weight": 12,
337
+ "blk.19.ffn_up_exps.weight": 10,
338
+ "blk.19.ffn_up_shexp.weight": 12,
339
+ "blk.19.post_attention_norm.weight": 0,
340
+ "blk.20.attn_k.bias": 0,
341
+ "blk.20.attn_k.weight": 12,
342
+ "blk.20.attn_norm.weight": 0,
343
+ "blk.20.attn_output.weight": 12,
344
+ "blk.20.attn_q.bias": 0,
345
+ "blk.20.attn_q.weight": 12,
346
+ "blk.20.attn_v.bias": 0,
347
+ "blk.20.attn_v.weight": 12,
348
+ "blk.20.exp_probs_b.bias": 0,
349
+ "blk.20.ffn_down_exps.weight": 20,
350
+ "blk.20.ffn_down_shexp.weight": 20,
351
+ "blk.20.ffn_gate_exps.weight": 10,
352
+ "blk.20.ffn_gate_inp.weight": 0,
353
+ "blk.20.ffn_gate_shexp.weight": 12,
354
+ "blk.20.ffn_up_exps.weight": 10,
355
+ "blk.20.ffn_up_shexp.weight": 12,
356
+ "blk.20.post_attention_norm.weight": 0,
357
+ "blk.21.attn_k.bias": 0,
358
+ "blk.21.attn_k.weight": 12,
359
+ "blk.21.attn_norm.weight": 0,
360
+ "blk.21.attn_output.weight": 12,
361
+ "blk.21.attn_q.bias": 0,
362
+ "blk.21.attn_q.weight": 12,
363
+ "blk.21.attn_v.bias": 0,
364
+ "blk.21.attn_v.weight": 12,
365
+ "blk.21.exp_probs_b.bias": 0,
366
+ "blk.21.ffn_down_exps.weight": 20,
367
+ "blk.21.ffn_down_shexp.weight": 20,
368
+ "blk.21.ffn_gate_exps.weight": 10,
369
+ "blk.21.ffn_gate_inp.weight": 0,
370
+ "blk.21.ffn_gate_shexp.weight": 12,
371
+ "blk.21.ffn_up_exps.weight": 10,
372
+ "blk.21.ffn_up_shexp.weight": 12,
373
+ "blk.21.post_attention_norm.weight": 0,
374
+ "blk.22.attn_k.bias": 0,
375
+ "blk.22.attn_k.weight": 12,
376
+ "blk.22.attn_norm.weight": 0,
377
+ "blk.22.attn_output.weight": 12,
378
+ "blk.22.attn_q.bias": 0,
379
+ "blk.22.attn_q.weight": 12,
380
+ "blk.22.attn_v.bias": 0,
381
+ "blk.22.attn_v.weight": 12,
382
+ "blk.22.exp_probs_b.bias": 0,
383
+ "blk.22.ffn_down_exps.weight": 20,
384
+ "blk.22.ffn_down_shexp.weight": 20,
385
+ "blk.22.ffn_gate_exps.weight": 10,
386
+ "blk.22.ffn_gate_inp.weight": 0,
387
+ "blk.22.ffn_gate_shexp.weight": 12,
388
+ "blk.22.ffn_up_exps.weight": 10,
389
+ "blk.22.ffn_up_shexp.weight": 12,
390
+ "blk.22.post_attention_norm.weight": 0,
391
+ "blk.23.attn_k.bias": 0,
392
+ "blk.23.attn_k.weight": 12,
393
+ "blk.23.attn_norm.weight": 0,
394
+ "blk.23.attn_output.weight": 12,
395
+ "blk.23.attn_q.bias": 0,
396
+ "blk.23.attn_q.weight": 12,
397
+ "blk.23.attn_v.bias": 0,
398
+ "blk.23.attn_v.weight": 12,
399
+ "blk.23.exp_probs_b.bias": 0,
400
+ "blk.23.ffn_down_exps.weight": 20,
401
+ "blk.23.ffn_down_shexp.weight": 20,
402
+ "blk.23.ffn_gate_exps.weight": 10,
403
+ "blk.23.ffn_gate_inp.weight": 0,
404
+ "blk.23.ffn_gate_shexp.weight": 12,
405
+ "blk.23.ffn_up_exps.weight": 10,
406
+ "blk.23.ffn_up_shexp.weight": 12,
407
+ "blk.23.post_attention_norm.weight": 0,
408
+ "blk.24.attn_k.bias": 0,
409
+ "blk.24.attn_k.weight": 12,
410
+ "blk.24.attn_norm.weight": 0,
411
+ "blk.24.attn_output.weight": 14,
412
+ "blk.24.attn_q.bias": 0,
413
+ "blk.24.attn_q.weight": 12,
414
+ "blk.24.attn_v.bias": 0,
415
+ "blk.24.attn_v.weight": 12,
416
+ "blk.24.exp_probs_b.bias": 0,
417
+ "blk.24.ffn_down_exps.weight": 20,
418
+ "blk.24.ffn_down_shexp.weight": 20,
419
+ "blk.24.ffn_gate_exps.weight": 10,
420
+ "blk.24.ffn_gate_inp.weight": 0,
421
+ "blk.24.ffn_gate_shexp.weight": 12,
422
+ "blk.24.ffn_up_exps.weight": 10,
423
+ "blk.24.ffn_up_shexp.weight": 12,
424
+ "blk.24.post_attention_norm.weight": 0,
425
+ "blk.25.attn_k.bias": 0,
426
+ "blk.25.attn_k.weight": 12,
427
+ "blk.25.attn_norm.weight": 0,
428
+ "blk.25.attn_output.weight": 12,
429
+ "blk.25.attn_q.bias": 0,
430
+ "blk.25.attn_q.weight": 12,
431
+ "blk.25.attn_v.bias": 0,
432
+ "blk.25.attn_v.weight": 12,
433
+ "blk.25.exp_probs_b.bias": 0,
434
+ "blk.25.ffn_down_exps.weight": 20,
435
+ "blk.25.ffn_down_shexp.weight": 20,
436
+ "blk.25.ffn_gate_exps.weight": 10,
437
+ "blk.25.ffn_gate_inp.weight": 0,
438
+ "blk.25.ffn_gate_shexp.weight": 12,
439
+ "blk.25.ffn_up_exps.weight": 10,
440
+ "blk.25.ffn_up_shexp.weight": 12,
441
+ "blk.25.post_attention_norm.weight": 0,
442
+ "blk.26.attn_k.bias": 0,
443
+ "blk.26.attn_k.weight": 12,
444
+ "blk.26.attn_norm.weight": 0,
445
+ "blk.26.attn_output.weight": 12,
446
+ "blk.26.attn_q.bias": 0,
447
+ "blk.26.attn_q.weight": 12,
448
+ "blk.26.attn_v.bias": 0,
449
+ "blk.26.attn_v.weight": 12,
450
+ "blk.26.exp_probs_b.bias": 0,
451
+ "blk.26.ffn_down_exps.weight": 20,
452
+ "blk.26.ffn_down_shexp.weight": 20,
453
+ "blk.26.ffn_gate_exps.weight": 10,
454
+ "blk.26.ffn_gate_inp.weight": 0,
455
+ "blk.26.ffn_gate_shexp.weight": 12,
456
+ "blk.26.ffn_up_exps.weight": 10,
457
+ "blk.26.ffn_up_shexp.weight": 12,
458
+ "blk.26.post_attention_norm.weight": 0,
459
+ "blk.27.attn_k.bias": 0,
460
+ "blk.27.attn_k.weight": 12,
461
+ "blk.27.attn_norm.weight": 0,
462
+ "blk.27.attn_output.weight": 12,
463
+ "blk.27.attn_q.bias": 0,
464
+ "blk.27.attn_q.weight": 12,
465
+ "blk.27.attn_v.bias": 0,
466
+ "blk.27.attn_v.weight": 12,
467
+ "blk.27.exp_probs_b.bias": 0,
468
+ "blk.27.ffn_down_exps.weight": 20,
469
+ "blk.27.ffn_down_shexp.weight": 20,
470
+ "blk.27.ffn_gate_exps.weight": 10,
471
+ "blk.27.ffn_gate_inp.weight": 0,
472
+ "blk.27.ffn_gate_shexp.weight": 12,
473
+ "blk.27.ffn_up_exps.weight": 10,
474
+ "blk.27.ffn_up_shexp.weight": 12,
475
+ "blk.27.post_attention_norm.weight": 0,
476
+ "blk.28.attn_k.bias": 0,
477
+ "blk.28.attn_k.weight": 12,
478
+ "blk.28.attn_norm.weight": 0,
479
+ "blk.28.attn_output.weight": 14,
480
+ "blk.28.attn_q.bias": 0,
481
+ "blk.28.attn_q.weight": 12,
482
+ "blk.28.attn_v.bias": 0,
483
+ "blk.28.attn_v.weight": 12,
484
+ "blk.28.exp_probs_b.bias": 0,
485
+ "blk.28.ffn_down_exps.weight": 20,
486
+ "blk.28.ffn_down_shexp.weight": 20,
487
+ "blk.28.ffn_gate_exps.weight": 10,
488
+ "blk.28.ffn_gate_inp.weight": 0,
489
+ "blk.28.ffn_gate_shexp.weight": 12,
490
+ "blk.28.ffn_up_exps.weight": 10,
491
+ "blk.28.ffn_up_shexp.weight": 12,
492
+ "blk.28.post_attention_norm.weight": 0,
493
+ "blk.29.attn_k.bias": 0,
494
+ "blk.29.attn_k.weight": 12,
495
+ "blk.29.attn_norm.weight": 0,
496
+ "blk.29.attn_output.weight": 12,
497
+ "blk.29.attn_q.bias": 0,
498
+ "blk.29.attn_q.weight": 12,
499
+ "blk.29.attn_v.bias": 0,
500
+ "blk.29.attn_v.weight": 12,
501
+ "blk.29.exp_probs_b.bias": 0,
502
+ "blk.29.ffn_down_exps.weight": 20,
503
+ "blk.29.ffn_down_shexp.weight": 20,
504
+ "blk.29.ffn_gate_exps.weight": 10,
505
+ "blk.29.ffn_gate_inp.weight": 0,
506
+ "blk.29.ffn_gate_shexp.weight": 12,
507
+ "blk.29.ffn_up_exps.weight": 10,
508
+ "blk.29.ffn_up_shexp.weight": 12,
509
+ "blk.29.post_attention_norm.weight": 0,
510
+ "blk.30.attn_k.bias": 0,
511
+ "blk.30.attn_k.weight": 12,
512
+ "blk.30.attn_norm.weight": 0,
513
+ "blk.30.attn_output.weight": 14,
514
+ "blk.30.attn_q.bias": 0,
515
+ "blk.30.attn_q.weight": 12,
516
+ "blk.30.attn_v.bias": 0,
517
+ "blk.30.attn_v.weight": 12,
518
+ "blk.30.exp_probs_b.bias": 0,
519
+ "blk.30.ffn_down_exps.weight": 20,
520
+ "blk.30.ffn_down_shexp.weight": 20,
521
+ "blk.30.ffn_gate_exps.weight": 10,
522
+ "blk.30.ffn_gate_inp.weight": 0,
523
+ "blk.30.ffn_gate_shexp.weight": 12,
524
+ "blk.30.ffn_up_exps.weight": 10,
525
+ "blk.30.ffn_up_shexp.weight": 12,
526
+ "blk.30.post_attention_norm.weight": 0,
527
+ "blk.31.attn_k.bias": 0,
528
+ "blk.31.attn_k.weight": 12,
529
+ "blk.31.attn_norm.weight": 0,
530
+ "blk.31.attn_output.weight": 14,
531
+ "blk.31.attn_q.bias": 0,
532
+ "blk.31.attn_q.weight": 12,
533
+ "blk.31.attn_v.bias": 0,
534
+ "blk.31.attn_v.weight": 12,
535
+ "blk.31.exp_probs_b.bias": 0,
536
+ "blk.31.ffn_down_exps.weight": 20,
537
+ "blk.31.ffn_down_shexp.weight": 20,
538
+ "blk.31.ffn_gate_exps.weight": 10,
539
+ "blk.31.ffn_gate_inp.weight": 0,
540
+ "blk.31.ffn_gate_shexp.weight": 12,
541
+ "blk.31.ffn_up_exps.weight": 10,
542
+ "blk.31.ffn_up_shexp.weight": 12,
543
+ "blk.31.post_attention_norm.weight": 0,
544
+ "blk.32.attn_k.bias": 0,
545
+ "blk.32.attn_k.weight": 12,
546
+ "blk.32.attn_norm.weight": 0,
547
+ "blk.32.attn_output.weight": 12,
548
+ "blk.32.attn_q.bias": 0,
549
+ "blk.32.attn_q.weight": 12,
550
+ "blk.32.attn_v.bias": 0,
551
+ "blk.32.attn_v.weight": 12,
552
+ "blk.32.exp_probs_b.bias": 0,
553
+ "blk.32.ffn_down_exps.weight": 20,
554
+ "blk.32.ffn_down_shexp.weight": 20,
555
+ "blk.32.ffn_gate_exps.weight": 10,
556
+ "blk.32.ffn_gate_inp.weight": 0,
557
+ "blk.32.ffn_gate_shexp.weight": 12,
558
+ "blk.32.ffn_up_exps.weight": 10,
559
+ "blk.32.ffn_up_shexp.weight": 12,
560
+ "blk.32.post_attention_norm.weight": 0,
561
+ "blk.33.attn_k.bias": 0,
562
+ "blk.33.attn_k.weight": 12,
563
+ "blk.33.attn_norm.weight": 0,
564
+ "blk.33.attn_output.weight": 12,
565
+ "blk.33.attn_q.bias": 0,
566
+ "blk.33.attn_q.weight": 12,
567
+ "blk.33.attn_v.bias": 0,
568
+ "blk.33.attn_v.weight": 12,
569
+ "blk.33.exp_probs_b.bias": 0,
570
+ "blk.33.ffn_down_exps.weight": 20,
571
+ "blk.33.ffn_down_shexp.weight": 20,
572
+ "blk.33.ffn_gate_exps.weight": 10,
573
+ "blk.33.ffn_gate_inp.weight": 0,
574
+ "blk.33.ffn_gate_shexp.weight": 12,
575
+ "blk.33.ffn_up_exps.weight": 10,
576
+ "blk.33.ffn_up_shexp.weight": 12,
577
+ "blk.33.post_attention_norm.weight": 0,
578
+ "blk.34.attn_k.bias": 0,
579
+ "blk.34.attn_k.weight": 12,
580
+ "blk.34.attn_norm.weight": 0,
581
+ "blk.34.attn_output.weight": 14,
582
+ "blk.34.attn_q.bias": 0,
583
+ "blk.34.attn_q.weight": 12,
584
+ "blk.34.attn_v.bias": 0,
585
+ "blk.34.attn_v.weight": 12,
586
+ "blk.34.exp_probs_b.bias": 0,
587
+ "blk.34.ffn_down_exps.weight": 20,
588
+ "blk.34.ffn_down_shexp.weight": 20,
589
+ "blk.34.ffn_gate_exps.weight": 10,
590
+ "blk.34.ffn_gate_inp.weight": 0,
591
+ "blk.34.ffn_gate_shexp.weight": 12,
592
+ "blk.34.ffn_up_exps.weight": 10,
593
+ "blk.34.ffn_up_shexp.weight": 12,
594
+ "blk.34.post_attention_norm.weight": 0,
595
+ "blk.35.attn_k.bias": 0,
596
+ "blk.35.attn_k.weight": 12,
597
+ "blk.35.attn_norm.weight": 0,
598
+ "blk.35.attn_output.weight": 14,
599
+ "blk.35.attn_q.bias": 0,
600
+ "blk.35.attn_q.weight": 12,
601
+ "blk.35.attn_v.bias": 0,
602
+ "blk.35.attn_v.weight": 12,
603
+ "blk.35.exp_probs_b.bias": 0,
604
+ "blk.35.ffn_down_exps.weight": 20,
605
+ "blk.35.ffn_down_shexp.weight": 20,
606
+ "blk.35.ffn_gate_exps.weight": 10,
607
+ "blk.35.ffn_gate_inp.weight": 0,
608
+ "blk.35.ffn_gate_shexp.weight": 12,
609
+ "blk.35.ffn_up_exps.weight": 10,
610
+ "blk.35.ffn_up_shexp.weight": 12,
611
+ "blk.35.post_attention_norm.weight": 0,
612
+ "blk.36.attn_k.bias": 0,
613
+ "blk.36.attn_k.weight": 12,
614
+ "blk.36.attn_norm.weight": 0,
615
+ "blk.36.attn_output.weight": 14,
616
+ "blk.36.attn_q.bias": 0,
617
+ "blk.36.attn_q.weight": 12,
618
+ "blk.36.attn_v.bias": 0,
619
+ "blk.36.attn_v.weight": 12,
620
+ "blk.36.exp_probs_b.bias": 0,
621
+ "blk.36.ffn_down_exps.weight": 20,
622
+ "blk.36.ffn_down_shexp.weight": 20,
623
+ "blk.36.ffn_gate_exps.weight": 10,
624
+ "blk.36.ffn_gate_inp.weight": 0,
625
+ "blk.36.ffn_gate_shexp.weight": 12,
626
+ "blk.36.ffn_up_exps.weight": 10,
627
+ "blk.36.ffn_up_shexp.weight": 12,
628
+ "blk.36.post_attention_norm.weight": 0,
629
+ "blk.37.attn_k.bias": 0,
630
+ "blk.37.attn_k.weight": 12,
631
+ "blk.37.attn_norm.weight": 0,
632
+ "blk.37.attn_output.weight": 14,
633
+ "blk.37.attn_q.bias": 0,
634
+ "blk.37.attn_q.weight": 12,
635
+ "blk.37.attn_v.bias": 0,
636
+ "blk.37.attn_v.weight": 12,
637
+ "blk.37.exp_probs_b.bias": 0,
638
+ "blk.37.ffn_down_exps.weight": 20,
639
+ "blk.37.ffn_down_shexp.weight": 20,
640
+ "blk.37.ffn_gate_exps.weight": 10,
641
+ "blk.37.ffn_gate_inp.weight": 0,
642
+ "blk.37.ffn_gate_shexp.weight": 12,
643
+ "blk.37.ffn_up_exps.weight": 10,
644
+ "blk.37.ffn_up_shexp.weight": 12,
645
+ "blk.37.post_attention_norm.weight": 0,
646
+ "blk.38.attn_k.bias": 0,
647
+ "blk.38.attn_k.weight": 12,
648
+ "blk.38.attn_norm.weight": 0,
649
+ "blk.38.attn_output.weight": 12,
650
+ "blk.38.attn_q.bias": 0,
651
+ "blk.38.attn_q.weight": 12,
652
+ "blk.38.attn_v.bias": 0,
653
+ "blk.38.attn_v.weight": 12,
654
+ "blk.38.exp_probs_b.bias": 0,
655
+ "blk.38.ffn_down_exps.weight": 20,
656
+ "blk.38.ffn_down_shexp.weight": 20,
657
+ "blk.38.ffn_gate_exps.weight": 10,
658
+ "blk.38.ffn_gate_inp.weight": 0,
659
+ "blk.38.ffn_gate_shexp.weight": 12,
660
+ "blk.38.ffn_up_exps.weight": 10,
661
+ "blk.38.ffn_up_shexp.weight": 12,
662
+ "blk.38.post_attention_norm.weight": 0,
663
+ "blk.39.attn_k.bias": 0,
664
+ "blk.39.attn_k.weight": 12,
665
+ "blk.39.attn_norm.weight": 0,
666
+ "blk.39.attn_output.weight": 12,
667
+ "blk.39.attn_q.bias": 0,
668
+ "blk.39.attn_q.weight": 12,
669
+ "blk.39.attn_v.bias": 0,
670
+ "blk.39.attn_v.weight": 12,
671
+ "blk.39.exp_probs_b.bias": 0,
672
+ "blk.39.ffn_down_exps.weight": 20,
673
+ "blk.39.ffn_down_shexp.weight": 20,
674
+ "blk.39.ffn_gate_exps.weight": 10,
675
+ "blk.39.ffn_gate_inp.weight": 0,
676
+ "blk.39.ffn_gate_shexp.weight": 12,
677
+ "blk.39.ffn_up_exps.weight": 10,
678
+ "blk.39.ffn_up_shexp.weight": 12,
679
+ "blk.39.post_attention_norm.weight": 0,
680
+ "blk.40.attn_k.bias": 0,
681
+ "blk.40.attn_k.weight": 12,
682
+ "blk.40.attn_norm.weight": 0,
683
+ "blk.40.attn_output.weight": 14,
684
+ "blk.40.attn_q.bias": 0,
685
+ "blk.40.attn_q.weight": 12,
686
+ "blk.40.attn_v.bias": 0,
687
+ "blk.40.attn_v.weight": 12,
688
+ "blk.40.exp_probs_b.bias": 0,
689
+ "blk.40.ffn_down_exps.weight": 20,
690
+ "blk.40.ffn_down_shexp.weight": 20,
691
+ "blk.40.ffn_gate_exps.weight": 10,
692
+ "blk.40.ffn_gate_inp.weight": 0,
693
+ "blk.40.ffn_gate_shexp.weight": 12,
694
+ "blk.40.ffn_up_exps.weight": 10,
695
+ "blk.40.ffn_up_shexp.weight": 12,
696
+ "blk.40.post_attention_norm.weight": 0,
697
+ "blk.41.attn_k.bias": 0,
698
+ "blk.41.attn_k.weight": 12,
699
+ "blk.41.attn_norm.weight": 0,
700
+ "blk.41.attn_output.weight": 12,
701
+ "blk.41.attn_q.bias": 0,
702
+ "blk.41.attn_q.weight": 12,
703
+ "blk.41.attn_v.bias": 0,
704
+ "blk.41.attn_v.weight": 12,
705
+ "blk.41.exp_probs_b.bias": 0,
706
+ "blk.41.ffn_down_exps.weight": 20,
707
+ "blk.41.ffn_down_shexp.weight": 20,
708
+ "blk.41.ffn_gate_exps.weight": 10,
709
+ "blk.41.ffn_gate_inp.weight": 0,
710
+ "blk.41.ffn_gate_shexp.weight": 12,
711
+ "blk.41.ffn_up_exps.weight": 10,
712
+ "blk.41.ffn_up_shexp.weight": 12,
713
+ "blk.41.post_attention_norm.weight": 0,
714
+ "blk.42.attn_k.bias": 0,
715
+ "blk.42.attn_k.weight": 12,
716
+ "blk.42.attn_norm.weight": 0,
717
+ "blk.42.attn_output.weight": 12,
718
+ "blk.42.attn_q.bias": 0,
719
+ "blk.42.attn_q.weight": 12,
720
+ "blk.42.attn_v.bias": 0,
721
+ "blk.42.attn_v.weight": 12,
722
+ "blk.42.exp_probs_b.bias": 0,
723
+ "blk.42.ffn_down_exps.weight": 20,
724
+ "blk.42.ffn_down_shexp.weight": 20,
725
+ "blk.42.ffn_gate_exps.weight": 10,
726
+ "blk.42.ffn_gate_inp.weight": 0,
727
+ "blk.42.ffn_gate_shexp.weight": 12,
728
+ "blk.42.ffn_up_exps.weight": 10,
729
+ "blk.42.ffn_up_shexp.weight": 12,
730
+ "blk.42.post_attention_norm.weight": 0,
731
+ "blk.43.attn_k.bias": 0,
732
+ "blk.43.attn_k.weight": 13,
733
+ "blk.43.attn_norm.weight": 0,
734
+ "blk.43.attn_output.weight": 14,
735
+ "blk.43.attn_q.bias": 0,
736
+ "blk.43.attn_q.weight": 13,
737
+ "blk.43.attn_v.bias": 0,
738
+ "blk.43.attn_v.weight": 13,
739
+ "blk.43.exp_probs_b.bias": 0,
740
+ "blk.43.ffn_down_exps.weight": 20,
741
+ "blk.43.ffn_down_shexp.weight": 20,
742
+ "blk.43.ffn_gate_exps.weight": 10,
743
+ "blk.43.ffn_gate_inp.weight": 0,
744
+ "blk.43.ffn_gate_shexp.weight": 12,
745
+ "blk.43.ffn_up_exps.weight": 10,
746
+ "blk.43.ffn_up_shexp.weight": 12,
747
+ "blk.43.post_attention_norm.weight": 0,
748
+ "blk.44.attn_k.bias": 0,
749
+ "blk.44.attn_k.weight": 12,
750
+ "blk.44.attn_norm.weight": 0,
751
+ "blk.44.attn_output.weight": 12,
752
+ "blk.44.attn_q.bias": 0,
753
+ "blk.44.attn_q.weight": 12,
754
+ "blk.44.attn_v.bias": 0,
755
+ "blk.44.attn_v.weight": 12,
756
+ "blk.44.exp_probs_b.bias": 0,
757
+ "blk.44.ffn_down_exps.weight": 20,
758
+ "blk.44.ffn_down_shexp.weight": 20,
759
+ "blk.44.ffn_gate_exps.weight": 10,
760
+ "blk.44.ffn_gate_inp.weight": 0,
761
+ "blk.44.ffn_gate_shexp.weight": 12,
762
+ "blk.44.ffn_up_exps.weight": 10,
763
+ "blk.44.ffn_up_shexp.weight": 12,
764
+ "blk.44.post_attention_norm.weight": 0,
765
+ "blk.45.attn_k.bias": 0,
766
+ "blk.45.attn_k.weight": 13,
767
+ "blk.45.attn_norm.weight": 0,
768
+ "blk.45.attn_output.weight": 13,
769
+ "blk.45.attn_q.bias": 0,
770
+ "blk.45.attn_q.weight": 13,
771
+ "blk.45.attn_v.bias": 0,
772
+ "blk.45.attn_v.weight": 13,
773
+ "blk.45.exp_probs_b.bias": 0,
774
+ "blk.45.ffn_down_exps.weight": 20,
775
+ "blk.45.ffn_down_shexp.weight": 20,
776
+ "blk.45.ffn_gate_exps.weight": 10,
777
+ "blk.45.ffn_gate_inp.weight": 0,
778
+ "blk.45.ffn_gate_shexp.weight": 12,
779
+ "blk.45.ffn_up_exps.weight": 10,
780
+ "blk.45.ffn_up_shexp.weight": 12,
781
+ "blk.45.post_attention_norm.weight": 0,
782
+ "blk.46.attn_k.bias": 0,
783
+ "blk.46.attn_k.weight": 12,
784
+ "blk.46.attn_norm.weight": 0,
785
+ "blk.46.attn_output.weight": 12,
786
+ "blk.46.attn_q.bias": 0,
787
+ "blk.46.attn_q.weight": 12,
788
+ "blk.46.attn_v.bias": 0,
789
+ "blk.46.attn_v.weight": 12,
790
+ "blk.46.exp_probs_b.bias": 0,
791
+ "blk.46.ffn_down_exps.weight": 6,
792
+ "blk.46.ffn_down_shexp.weight": 6,
793
+ "blk.46.ffn_gate_exps.weight": 10,
794
+ "blk.46.ffn_gate_inp.weight": 0,
795
+ "blk.46.ffn_gate_shexp.weight": 12,
796
+ "blk.46.ffn_up_exps.weight": 10,
797
+ "blk.46.ffn_up_shexp.weight": 12,
798
+ "blk.46.nextn.eh_proj.weight": 10,
799
+ "blk.46.nextn.embed_tokens.weight": 10,
800
+ "blk.46.nextn.enorm.weight": 0,
801
+ "blk.46.nextn.hnorm.weight": 0,
802
+ "blk.46.nextn.shared_head_head.weight": 10,
803
+ "blk.46.nextn.shared_head_norm.weight": 0,
804
+ "blk.46.post_attention_norm.weight": 0
805
+ }