YOYO-AI commited on
Commit
421b110
·
verified ·
1 Parent(s): 4820b57

Upload global_state.json

Browse files
Files changed (1) hide show
  1. global_state.json +1369 -0
global_state.json ADDED
@@ -0,0 +1,1369 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": 1,
3
+ "config_hash": "bdbfa26d559d9211e2cf48a38a3ca6c197155a27",
4
+ "run_config": {
5
+ "alpha": 0.1,
6
+ "n_use_samples": 2000,
7
+ "eps_feature": 0.1,
8
+ "layer_band": null,
9
+ "top_r_layers": 8
10
+ },
11
+ "finished_shards": [
12
+ "model.safetensors-00001-of-00004.safetensors",
13
+ "model.safetensors-00002-of-00004.safetensors",
14
+ "model.safetensors-00003-of-00004.safetensors",
15
+ "model.safetensors-00004-of-00004.safetensors"
16
+ ],
17
+ "tensor_stats": {
18
+ "model.language_model.layers.14.mlp.down_proj.weight": {
19
+ "tensor_name": "model.language_model.layers.14.mlp.down_proj.weight",
20
+ "layer": 14,
21
+ "module": "mlp",
22
+ "projection": "down_proj",
23
+ "total_params": 50331648,
24
+ "modified_params": 524288,
25
+ "ratio": 0.010416666666666666,
26
+ "source_layers_count": 8
27
+ },
28
+ "model.language_model.layers.14.mlp.gate_proj.weight": {
29
+ "tensor_name": "model.language_model.layers.14.mlp.gate_proj.weight",
30
+ "layer": 14,
31
+ "module": "mlp",
32
+ "projection": "gate_proj",
33
+ "total_params": 50331648,
34
+ "modified_params": 524288,
35
+ "ratio": 0.010416666666666666,
36
+ "source_layers_count": 8
37
+ },
38
+ "model.language_model.layers.14.mlp.up_proj.weight": {
39
+ "tensor_name": "model.language_model.layers.14.mlp.up_proj.weight",
40
+ "layer": 14,
41
+ "module": "mlp",
42
+ "projection": "up_proj",
43
+ "total_params": 50331648,
44
+ "modified_params": 524288,
45
+ "ratio": 0.010416666666666666,
46
+ "source_layers_count": 8
47
+ },
48
+ "model.language_model.layers.15.mlp.down_proj.weight": {
49
+ "tensor_name": "model.language_model.layers.15.mlp.down_proj.weight",
50
+ "layer": 15,
51
+ "module": "mlp",
52
+ "projection": "down_proj",
53
+ "total_params": 50331648,
54
+ "modified_params": 524288,
55
+ "ratio": 0.010416666666666666,
56
+ "source_layers_count": 8
57
+ },
58
+ "model.language_model.layers.15.mlp.gate_proj.weight": {
59
+ "tensor_name": "model.language_model.layers.15.mlp.gate_proj.weight",
60
+ "layer": 15,
61
+ "module": "mlp",
62
+ "projection": "gate_proj",
63
+ "total_params": 50331648,
64
+ "modified_params": 524288,
65
+ "ratio": 0.010416666666666666,
66
+ "source_layers_count": 8
67
+ },
68
+ "model.language_model.layers.15.mlp.up_proj.weight": {
69
+ "tensor_name": "model.language_model.layers.15.mlp.up_proj.weight",
70
+ "layer": 15,
71
+ "module": "mlp",
72
+ "projection": "up_proj",
73
+ "total_params": 50331648,
74
+ "modified_params": 524288,
75
+ "ratio": 0.010416666666666666,
76
+ "source_layers_count": 8
77
+ },
78
+ "model.language_model.layers.25.mlp.down_proj.weight": {
79
+ "tensor_name": "model.language_model.layers.25.mlp.down_proj.weight",
80
+ "layer": 25,
81
+ "module": "mlp",
82
+ "projection": "down_proj",
83
+ "total_params": 50331648,
84
+ "modified_params": 524288,
85
+ "ratio": 0.010416666666666666,
86
+ "source_layers_count": 8
87
+ },
88
+ "model.language_model.layers.26.mlp.down_proj.weight": {
89
+ "tensor_name": "model.language_model.layers.26.mlp.down_proj.weight",
90
+ "layer": 26,
91
+ "module": "mlp",
92
+ "projection": "down_proj",
93
+ "total_params": 50331648,
94
+ "modified_params": 524288,
95
+ "ratio": 0.010416666666666666,
96
+ "source_layers_count": 8
97
+ },
98
+ "model.language_model.layers.26.mlp.gate_proj.weight": {
99
+ "tensor_name": "model.language_model.layers.26.mlp.gate_proj.weight",
100
+ "layer": 26,
101
+ "module": "mlp",
102
+ "projection": "gate_proj",
103
+ "total_params": 50331648,
104
+ "modified_params": 524288,
105
+ "ratio": 0.010416666666666666,
106
+ "source_layers_count": 8
107
+ },
108
+ "model.language_model.layers.26.mlp.up_proj.weight": {
109
+ "tensor_name": "model.language_model.layers.26.mlp.up_proj.weight",
110
+ "layer": 26,
111
+ "module": "mlp",
112
+ "projection": "up_proj",
113
+ "total_params": 50331648,
114
+ "modified_params": 524288,
115
+ "ratio": 0.010416666666666666,
116
+ "source_layers_count": 8
117
+ },
118
+ "model.language_model.layers.27.mlp.down_proj.weight": {
119
+ "tensor_name": "model.language_model.layers.27.mlp.down_proj.weight",
120
+ "layer": 27,
121
+ "module": "mlp",
122
+ "projection": "down_proj",
123
+ "total_params": 50331648,
124
+ "modified_params": 524288,
125
+ "ratio": 0.010416666666666666,
126
+ "source_layers_count": 8
127
+ },
128
+ "model.language_model.layers.27.mlp.gate_proj.weight": {
129
+ "tensor_name": "model.language_model.layers.27.mlp.gate_proj.weight",
130
+ "layer": 27,
131
+ "module": "mlp",
132
+ "projection": "gate_proj",
133
+ "total_params": 50331648,
134
+ "modified_params": 524288,
135
+ "ratio": 0.010416666666666666,
136
+ "source_layers_count": 8
137
+ },
138
+ "model.language_model.layers.27.mlp.up_proj.weight": {
139
+ "tensor_name": "model.language_model.layers.27.mlp.up_proj.weight",
140
+ "layer": 27,
141
+ "module": "mlp",
142
+ "projection": "up_proj",
143
+ "total_params": 50331648,
144
+ "modified_params": 524288,
145
+ "ratio": 0.010416666666666666,
146
+ "source_layers_count": 8
147
+ },
148
+ "model.language_model.layers.3.mlp.down_proj.weight": {
149
+ "tensor_name": "model.language_model.layers.3.mlp.down_proj.weight",
150
+ "layer": 3,
151
+ "module": "mlp",
152
+ "projection": "down_proj",
153
+ "total_params": 50331648,
154
+ "modified_params": 524288,
155
+ "ratio": 0.010416666666666666,
156
+ "source_layers_count": 8
157
+ },
158
+ "model.language_model.layers.3.mlp.gate_proj.weight": {
159
+ "tensor_name": "model.language_model.layers.3.mlp.gate_proj.weight",
160
+ "layer": 3,
161
+ "module": "mlp",
162
+ "projection": "gate_proj",
163
+ "total_params": 50331648,
164
+ "modified_params": 524288,
165
+ "ratio": 0.010416666666666666,
166
+ "source_layers_count": 8
167
+ },
168
+ "model.language_model.layers.3.mlp.up_proj.weight": {
169
+ "tensor_name": "model.language_model.layers.3.mlp.up_proj.weight",
170
+ "layer": 3,
171
+ "module": "mlp",
172
+ "projection": "up_proj",
173
+ "total_params": 50331648,
174
+ "modified_params": 524288,
175
+ "ratio": 0.010416666666666666,
176
+ "source_layers_count": 8
177
+ },
178
+ "model.language_model.layers.30.mlp.down_proj.weight": {
179
+ "tensor_name": "model.language_model.layers.30.mlp.down_proj.weight",
180
+ "layer": 30,
181
+ "module": "mlp",
182
+ "projection": "down_proj",
183
+ "total_params": 50331648,
184
+ "modified_params": 524288,
185
+ "ratio": 0.010416666666666666,
186
+ "source_layers_count": 8
187
+ },
188
+ "model.language_model.layers.30.mlp.gate_proj.weight": {
189
+ "tensor_name": "model.language_model.layers.30.mlp.gate_proj.weight",
190
+ "layer": 30,
191
+ "module": "mlp",
192
+ "projection": "gate_proj",
193
+ "total_params": 50331648,
194
+ "modified_params": 524288,
195
+ "ratio": 0.010416666666666666,
196
+ "source_layers_count": 8
197
+ },
198
+ "model.language_model.layers.30.mlp.up_proj.weight": {
199
+ "tensor_name": "model.language_model.layers.30.mlp.up_proj.weight",
200
+ "layer": 30,
201
+ "module": "mlp",
202
+ "projection": "up_proj",
203
+ "total_params": 50331648,
204
+ "modified_params": 524288,
205
+ "ratio": 0.010416666666666666,
206
+ "source_layers_count": 8
207
+ },
208
+ "model.language_model.layers.31.mlp.gate_proj.weight": {
209
+ "tensor_name": "model.language_model.layers.31.mlp.gate_proj.weight",
210
+ "layer": 31,
211
+ "module": "mlp",
212
+ "projection": "gate_proj",
213
+ "total_params": 50331648,
214
+ "modified_params": 524288,
215
+ "ratio": 0.010416666666666666,
216
+ "source_layers_count": 8
217
+ },
218
+ "model.language_model.layers.31.mlp.up_proj.weight": {
219
+ "tensor_name": "model.language_model.layers.31.mlp.up_proj.weight",
220
+ "layer": 31,
221
+ "module": "mlp",
222
+ "projection": "up_proj",
223
+ "total_params": 50331648,
224
+ "modified_params": 524288,
225
+ "ratio": 0.010416666666666666,
226
+ "source_layers_count": 8
227
+ },
228
+ "model.language_model.layers.9.mlp.down_proj.weight": {
229
+ "tensor_name": "model.language_model.layers.9.mlp.down_proj.weight",
230
+ "layer": 9,
231
+ "module": "mlp",
232
+ "projection": "down_proj",
233
+ "total_params": 50331648,
234
+ "modified_params": 524288,
235
+ "ratio": 0.010416666666666666,
236
+ "source_layers_count": 8
237
+ },
238
+ "model.language_model.layers.9.mlp.gate_proj.weight": {
239
+ "tensor_name": "model.language_model.layers.9.mlp.gate_proj.weight",
240
+ "layer": 9,
241
+ "module": "mlp",
242
+ "projection": "gate_proj",
243
+ "total_params": 50331648,
244
+ "modified_params": 524288,
245
+ "ratio": 0.010416666666666666,
246
+ "source_layers_count": 8
247
+ },
248
+ "model.language_model.layers.9.mlp.up_proj.weight": {
249
+ "tensor_name": "model.language_model.layers.9.mlp.up_proj.weight",
250
+ "layer": 9,
251
+ "module": "mlp",
252
+ "projection": "up_proj",
253
+ "total_params": 50331648,
254
+ "modified_params": 524288,
255
+ "ratio": 0.010416666666666666,
256
+ "source_layers_count": 8
257
+ },
258
+ "mtp.layers.0.mlp.down_proj.weight": {
259
+ "tensor_name": "mtp.layers.0.mlp.down_proj.weight",
260
+ "layer": 0,
261
+ "module": "mlp",
262
+ "projection": "down_proj",
263
+ "total_params": 50331648,
264
+ "modified_params": 524288,
265
+ "ratio": 0.010416666666666666,
266
+ "source_layers_count": 8
267
+ },
268
+ "mtp.layers.0.mlp.gate_proj.weight": {
269
+ "tensor_name": "mtp.layers.0.mlp.gate_proj.weight",
270
+ "layer": 0,
271
+ "module": "mlp",
272
+ "projection": "gate_proj",
273
+ "total_params": 50331648,
274
+ "modified_params": 524288,
275
+ "ratio": 0.010416666666666666,
276
+ "source_layers_count": 8
277
+ },
278
+ "mtp.layers.0.mlp.up_proj.weight": {
279
+ "tensor_name": "mtp.layers.0.mlp.up_proj.weight",
280
+ "layer": 0,
281
+ "module": "mlp",
282
+ "projection": "up_proj",
283
+ "total_params": 50331648,
284
+ "modified_params": 524288,
285
+ "ratio": 0.010416666666666666,
286
+ "source_layers_count": 8
287
+ },
288
+ "model.language_model.layers.1.mlp.down_proj.weight": {
289
+ "tensor_name": "model.language_model.layers.1.mlp.down_proj.weight",
290
+ "layer": 1,
291
+ "module": "mlp",
292
+ "projection": "down_proj",
293
+ "total_params": 50331648,
294
+ "modified_params": 524288,
295
+ "ratio": 0.010416666666666666,
296
+ "source_layers_count": 8
297
+ },
298
+ "model.language_model.layers.1.mlp.gate_proj.weight": {
299
+ "tensor_name": "model.language_model.layers.1.mlp.gate_proj.weight",
300
+ "layer": 1,
301
+ "module": "mlp",
302
+ "projection": "gate_proj",
303
+ "total_params": 50331648,
304
+ "modified_params": 524288,
305
+ "ratio": 0.010416666666666666,
306
+ "source_layers_count": 8
307
+ },
308
+ "model.language_model.layers.1.mlp.up_proj.weight": {
309
+ "tensor_name": "model.language_model.layers.1.mlp.up_proj.weight",
310
+ "layer": 1,
311
+ "module": "mlp",
312
+ "projection": "up_proj",
313
+ "total_params": 50331648,
314
+ "modified_params": 524288,
315
+ "ratio": 0.010416666666666666,
316
+ "source_layers_count": 8
317
+ },
318
+ "model.language_model.layers.10.mlp.down_proj.weight": {
319
+ "tensor_name": "model.language_model.layers.10.mlp.down_proj.weight",
320
+ "layer": 10,
321
+ "module": "mlp",
322
+ "projection": "down_proj",
323
+ "total_params": 50331648,
324
+ "modified_params": 524288,
325
+ "ratio": 0.010416666666666666,
326
+ "source_layers_count": 8
327
+ },
328
+ "model.language_model.layers.10.mlp.gate_proj.weight": {
329
+ "tensor_name": "model.language_model.layers.10.mlp.gate_proj.weight",
330
+ "layer": 10,
331
+ "module": "mlp",
332
+ "projection": "gate_proj",
333
+ "total_params": 50331648,
334
+ "modified_params": 524288,
335
+ "ratio": 0.010416666666666666,
336
+ "source_layers_count": 8
337
+ },
338
+ "model.language_model.layers.10.mlp.up_proj.weight": {
339
+ "tensor_name": "model.language_model.layers.10.mlp.up_proj.weight",
340
+ "layer": 10,
341
+ "module": "mlp",
342
+ "projection": "up_proj",
343
+ "total_params": 50331648,
344
+ "modified_params": 524288,
345
+ "ratio": 0.010416666666666666,
346
+ "source_layers_count": 8
347
+ },
348
+ "model.language_model.layers.11.mlp.down_proj.weight": {
349
+ "tensor_name": "model.language_model.layers.11.mlp.down_proj.weight",
350
+ "layer": 11,
351
+ "module": "mlp",
352
+ "projection": "down_proj",
353
+ "total_params": 50331648,
354
+ "modified_params": 524288,
355
+ "ratio": 0.010416666666666666,
356
+ "source_layers_count": 8
357
+ },
358
+ "model.language_model.layers.11.mlp.gate_proj.weight": {
359
+ "tensor_name": "model.language_model.layers.11.mlp.gate_proj.weight",
360
+ "layer": 11,
361
+ "module": "mlp",
362
+ "projection": "gate_proj",
363
+ "total_params": 50331648,
364
+ "modified_params": 524288,
365
+ "ratio": 0.010416666666666666,
366
+ "source_layers_count": 8
367
+ },
368
+ "model.language_model.layers.11.mlp.up_proj.weight": {
369
+ "tensor_name": "model.language_model.layers.11.mlp.up_proj.weight",
370
+ "layer": 11,
371
+ "module": "mlp",
372
+ "projection": "up_proj",
373
+ "total_params": 50331648,
374
+ "modified_params": 524288,
375
+ "ratio": 0.010416666666666666,
376
+ "source_layers_count": 8
377
+ },
378
+ "model.language_model.layers.0.mlp.down_proj.weight": {
379
+ "tensor_name": "model.language_model.layers.0.mlp.down_proj.weight",
380
+ "layer": 0,
381
+ "module": "mlp",
382
+ "projection": "down_proj",
383
+ "total_params": 50331648,
384
+ "modified_params": 524288,
385
+ "ratio": 0.010416666666666666,
386
+ "source_layers_count": 8
387
+ },
388
+ "model.language_model.layers.0.mlp.gate_proj.weight": {
389
+ "tensor_name": "model.language_model.layers.0.mlp.gate_proj.weight",
390
+ "layer": 0,
391
+ "module": "mlp",
392
+ "projection": "gate_proj",
393
+ "total_params": 50331648,
394
+ "modified_params": 524288,
395
+ "ratio": 0.010416666666666666,
396
+ "source_layers_count": 8
397
+ },
398
+ "model.language_model.layers.0.mlp.up_proj.weight": {
399
+ "tensor_name": "model.language_model.layers.0.mlp.up_proj.weight",
400
+ "layer": 0,
401
+ "module": "mlp",
402
+ "projection": "up_proj",
403
+ "total_params": 50331648,
404
+ "modified_params": 524288,
405
+ "ratio": 0.010416666666666666,
406
+ "source_layers_count": 8
407
+ },
408
+ "model.language_model.layers.21.mlp.down_proj.weight": {
409
+ "tensor_name": "model.language_model.layers.21.mlp.down_proj.weight",
410
+ "layer": 21,
411
+ "module": "mlp",
412
+ "projection": "down_proj",
413
+ "total_params": 50331648,
414
+ "modified_params": 524288,
415
+ "ratio": 0.010416666666666666,
416
+ "source_layers_count": 8
417
+ },
418
+ "model.language_model.layers.21.mlp.gate_proj.weight": {
419
+ "tensor_name": "model.language_model.layers.21.mlp.gate_proj.weight",
420
+ "layer": 21,
421
+ "module": "mlp",
422
+ "projection": "gate_proj",
423
+ "total_params": 50331648,
424
+ "modified_params": 524288,
425
+ "ratio": 0.010416666666666666,
426
+ "source_layers_count": 8
427
+ },
428
+ "model.language_model.layers.21.mlp.up_proj.weight": {
429
+ "tensor_name": "model.language_model.layers.21.mlp.up_proj.weight",
430
+ "layer": 21,
431
+ "module": "mlp",
432
+ "projection": "up_proj",
433
+ "total_params": 50331648,
434
+ "modified_params": 524288,
435
+ "ratio": 0.010416666666666666,
436
+ "source_layers_count": 8
437
+ },
438
+ "model.language_model.layers.22.mlp.down_proj.weight": {
439
+ "tensor_name": "model.language_model.layers.22.mlp.down_proj.weight",
440
+ "layer": 22,
441
+ "module": "mlp",
442
+ "projection": "down_proj",
443
+ "total_params": 50331648,
444
+ "modified_params": 524288,
445
+ "ratio": 0.010416666666666666,
446
+ "source_layers_count": 8
447
+ },
448
+ "model.language_model.layers.22.mlp.gate_proj.weight": {
449
+ "tensor_name": "model.language_model.layers.22.mlp.gate_proj.weight",
450
+ "layer": 22,
451
+ "module": "mlp",
452
+ "projection": "gate_proj",
453
+ "total_params": 50331648,
454
+ "modified_params": 524288,
455
+ "ratio": 0.010416666666666666,
456
+ "source_layers_count": 8
457
+ },
458
+ "model.language_model.layers.22.mlp.up_proj.weight": {
459
+ "tensor_name": "model.language_model.layers.22.mlp.up_proj.weight",
460
+ "layer": 22,
461
+ "module": "mlp",
462
+ "projection": "up_proj",
463
+ "total_params": 50331648,
464
+ "modified_params": 524288,
465
+ "ratio": 0.010416666666666666,
466
+ "source_layers_count": 8
467
+ },
468
+ "model.language_model.layers.23.mlp.down_proj.weight": {
469
+ "tensor_name": "model.language_model.layers.23.mlp.down_proj.weight",
470
+ "layer": 23,
471
+ "module": "mlp",
472
+ "projection": "down_proj",
473
+ "total_params": 50331648,
474
+ "modified_params": 524288,
475
+ "ratio": 0.010416666666666666,
476
+ "source_layers_count": 8
477
+ },
478
+ "model.language_model.layers.23.mlp.gate_proj.weight": {
479
+ "tensor_name": "model.language_model.layers.23.mlp.gate_proj.weight",
480
+ "layer": 23,
481
+ "module": "mlp",
482
+ "projection": "gate_proj",
483
+ "total_params": 50331648,
484
+ "modified_params": 524288,
485
+ "ratio": 0.010416666666666666,
486
+ "source_layers_count": 8
487
+ },
488
+ "model.language_model.layers.23.mlp.up_proj.weight": {
489
+ "tensor_name": "model.language_model.layers.23.mlp.up_proj.weight",
490
+ "layer": 23,
491
+ "module": "mlp",
492
+ "projection": "up_proj",
493
+ "total_params": 50331648,
494
+ "modified_params": 524288,
495
+ "ratio": 0.010416666666666666,
496
+ "source_layers_count": 8
497
+ },
498
+ "model.language_model.layers.8.mlp.down_proj.weight": {
499
+ "tensor_name": "model.language_model.layers.8.mlp.down_proj.weight",
500
+ "layer": 8,
501
+ "module": "mlp",
502
+ "projection": "down_proj",
503
+ "total_params": 50331648,
504
+ "modified_params": 524288,
505
+ "ratio": 0.010416666666666666,
506
+ "source_layers_count": 8
507
+ },
508
+ "model.language_model.layers.8.mlp.gate_proj.weight": {
509
+ "tensor_name": "model.language_model.layers.8.mlp.gate_proj.weight",
510
+ "layer": 8,
511
+ "module": "mlp",
512
+ "projection": "gate_proj",
513
+ "total_params": 50331648,
514
+ "modified_params": 524288,
515
+ "ratio": 0.010416666666666666,
516
+ "source_layers_count": 8
517
+ },
518
+ "model.language_model.layers.8.mlp.up_proj.weight": {
519
+ "tensor_name": "model.language_model.layers.8.mlp.up_proj.weight",
520
+ "layer": 8,
521
+ "module": "mlp",
522
+ "projection": "up_proj",
523
+ "total_params": 50331648,
524
+ "modified_params": 524288,
525
+ "ratio": 0.010416666666666666,
526
+ "source_layers_count": 8
527
+ },
528
+ "model.language_model.layers.31.mlp.down_proj.weight": {
529
+ "tensor_name": "model.language_model.layers.31.mlp.down_proj.weight",
530
+ "layer": 31,
531
+ "module": "mlp",
532
+ "projection": "down_proj",
533
+ "total_params": 50331648,
534
+ "modified_params": 524288,
535
+ "ratio": 0.010416666666666666,
536
+ "source_layers_count": 8
537
+ },
538
+ "model.language_model.layers.4.mlp.down_proj.weight": {
539
+ "tensor_name": "model.language_model.layers.4.mlp.down_proj.weight",
540
+ "layer": 4,
541
+ "module": "mlp",
542
+ "projection": "down_proj",
543
+ "total_params": 50331648,
544
+ "modified_params": 524288,
545
+ "ratio": 0.010416666666666666,
546
+ "source_layers_count": 8
547
+ },
548
+ "model.language_model.layers.4.mlp.gate_proj.weight": {
549
+ "tensor_name": "model.language_model.layers.4.mlp.gate_proj.weight",
550
+ "layer": 4,
551
+ "module": "mlp",
552
+ "projection": "gate_proj",
553
+ "total_params": 50331648,
554
+ "modified_params": 524288,
555
+ "ratio": 0.010416666666666666,
556
+ "source_layers_count": 8
557
+ },
558
+ "model.language_model.layers.4.mlp.up_proj.weight": {
559
+ "tensor_name": "model.language_model.layers.4.mlp.up_proj.weight",
560
+ "layer": 4,
561
+ "module": "mlp",
562
+ "projection": "up_proj",
563
+ "total_params": 50331648,
564
+ "modified_params": 524288,
565
+ "ratio": 0.010416666666666666,
566
+ "source_layers_count": 8
567
+ },
568
+ "model.language_model.layers.16.mlp.down_proj.weight": {
569
+ "tensor_name": "model.language_model.layers.16.mlp.down_proj.weight",
570
+ "layer": 16,
571
+ "module": "mlp",
572
+ "projection": "down_proj",
573
+ "total_params": 50331648,
574
+ "modified_params": 524288,
575
+ "ratio": 0.010416666666666666,
576
+ "source_layers_count": 8
577
+ },
578
+ "model.language_model.layers.16.mlp.gate_proj.weight": {
579
+ "tensor_name": "model.language_model.layers.16.mlp.gate_proj.weight",
580
+ "layer": 16,
581
+ "module": "mlp",
582
+ "projection": "gate_proj",
583
+ "total_params": 50331648,
584
+ "modified_params": 524288,
585
+ "ratio": 0.010416666666666666,
586
+ "source_layers_count": 8
587
+ },
588
+ "model.language_model.layers.16.mlp.up_proj.weight": {
589
+ "tensor_name": "model.language_model.layers.16.mlp.up_proj.weight",
590
+ "layer": 16,
591
+ "module": "mlp",
592
+ "projection": "up_proj",
593
+ "total_params": 50331648,
594
+ "modified_params": 524288,
595
+ "ratio": 0.010416666666666666,
596
+ "source_layers_count": 8
597
+ },
598
+ "model.language_model.layers.17.mlp.down_proj.weight": {
599
+ "tensor_name": "model.language_model.layers.17.mlp.down_proj.weight",
600
+ "layer": 17,
601
+ "module": "mlp",
602
+ "projection": "down_proj",
603
+ "total_params": 50331648,
604
+ "modified_params": 524288,
605
+ "ratio": 0.010416666666666666,
606
+ "source_layers_count": 8
607
+ },
608
+ "model.language_model.layers.17.mlp.gate_proj.weight": {
609
+ "tensor_name": "model.language_model.layers.17.mlp.gate_proj.weight",
610
+ "layer": 17,
611
+ "module": "mlp",
612
+ "projection": "gate_proj",
613
+ "total_params": 50331648,
614
+ "modified_params": 524288,
615
+ "ratio": 0.010416666666666666,
616
+ "source_layers_count": 8
617
+ },
618
+ "model.language_model.layers.17.mlp.up_proj.weight": {
619
+ "tensor_name": "model.language_model.layers.17.mlp.up_proj.weight",
620
+ "layer": 17,
621
+ "module": "mlp",
622
+ "projection": "up_proj",
623
+ "total_params": 50331648,
624
+ "modified_params": 524288,
625
+ "ratio": 0.010416666666666666,
626
+ "source_layers_count": 8
627
+ },
628
+ "model.language_model.layers.18.mlp.down_proj.weight": {
629
+ "tensor_name": "model.language_model.layers.18.mlp.down_proj.weight",
630
+ "layer": 18,
631
+ "module": "mlp",
632
+ "projection": "down_proj",
633
+ "total_params": 50331648,
634
+ "modified_params": 524288,
635
+ "ratio": 0.010416666666666666,
636
+ "source_layers_count": 8
637
+ },
638
+ "model.language_model.layers.18.mlp.gate_proj.weight": {
639
+ "tensor_name": "model.language_model.layers.18.mlp.gate_proj.weight",
640
+ "layer": 18,
641
+ "module": "mlp",
642
+ "projection": "gate_proj",
643
+ "total_params": 50331648,
644
+ "modified_params": 524288,
645
+ "ratio": 0.010416666666666666,
646
+ "source_layers_count": 8
647
+ },
648
+ "model.language_model.layers.18.mlp.up_proj.weight": {
649
+ "tensor_name": "model.language_model.layers.18.mlp.up_proj.weight",
650
+ "layer": 18,
651
+ "module": "mlp",
652
+ "projection": "up_proj",
653
+ "total_params": 50331648,
654
+ "modified_params": 524288,
655
+ "ratio": 0.010416666666666666,
656
+ "source_layers_count": 8
657
+ },
658
+ "model.language_model.layers.19.mlp.down_proj.weight": {
659
+ "tensor_name": "model.language_model.layers.19.mlp.down_proj.weight",
660
+ "layer": 19,
661
+ "module": "mlp",
662
+ "projection": "down_proj",
663
+ "total_params": 50331648,
664
+ "modified_params": 524288,
665
+ "ratio": 0.010416666666666666,
666
+ "source_layers_count": 8
667
+ },
668
+ "model.language_model.layers.19.mlp.gate_proj.weight": {
669
+ "tensor_name": "model.language_model.layers.19.mlp.gate_proj.weight",
670
+ "layer": 19,
671
+ "module": "mlp",
672
+ "projection": "gate_proj",
673
+ "total_params": 50331648,
674
+ "modified_params": 524288,
675
+ "ratio": 0.010416666666666666,
676
+ "source_layers_count": 8
677
+ },
678
+ "model.language_model.layers.19.mlp.up_proj.weight": {
679
+ "tensor_name": "model.language_model.layers.19.mlp.up_proj.weight",
680
+ "layer": 19,
681
+ "module": "mlp",
682
+ "projection": "up_proj",
683
+ "total_params": 50331648,
684
+ "modified_params": 524288,
685
+ "ratio": 0.010416666666666666,
686
+ "source_layers_count": 8
687
+ },
688
+ "model.language_model.layers.24.mlp.down_proj.weight": {
689
+ "tensor_name": "model.language_model.layers.24.mlp.down_proj.weight",
690
+ "layer": 24,
691
+ "module": "mlp",
692
+ "projection": "down_proj",
693
+ "total_params": 50331648,
694
+ "modified_params": 524288,
695
+ "ratio": 0.010416666666666666,
696
+ "source_layers_count": 8
697
+ },
698
+ "model.language_model.layers.24.mlp.gate_proj.weight": {
699
+ "tensor_name": "model.language_model.layers.24.mlp.gate_proj.weight",
700
+ "layer": 24,
701
+ "module": "mlp",
702
+ "projection": "gate_proj",
703
+ "total_params": 50331648,
704
+ "modified_params": 524288,
705
+ "ratio": 0.010416666666666666,
706
+ "source_layers_count": 8
707
+ },
708
+ "model.language_model.layers.24.mlp.up_proj.weight": {
709
+ "tensor_name": "model.language_model.layers.24.mlp.up_proj.weight",
710
+ "layer": 24,
711
+ "module": "mlp",
712
+ "projection": "up_proj",
713
+ "total_params": 50331648,
714
+ "modified_params": 524288,
715
+ "ratio": 0.010416666666666666,
716
+ "source_layers_count": 8
717
+ },
718
+ "model.language_model.layers.25.mlp.gate_proj.weight": {
719
+ "tensor_name": "model.language_model.layers.25.mlp.gate_proj.weight",
720
+ "layer": 25,
721
+ "module": "mlp",
722
+ "projection": "gate_proj",
723
+ "total_params": 50331648,
724
+ "modified_params": 524288,
725
+ "ratio": 0.010416666666666666,
726
+ "source_layers_count": 8
727
+ },
728
+ "model.language_model.layers.25.mlp.up_proj.weight": {
729
+ "tensor_name": "model.language_model.layers.25.mlp.up_proj.weight",
730
+ "layer": 25,
731
+ "module": "mlp",
732
+ "projection": "up_proj",
733
+ "total_params": 50331648,
734
+ "modified_params": 524288,
735
+ "ratio": 0.010416666666666666,
736
+ "source_layers_count": 8
737
+ },
738
+ "model.language_model.layers.5.mlp.down_proj.weight": {
739
+ "tensor_name": "model.language_model.layers.5.mlp.down_proj.weight",
740
+ "layer": 5,
741
+ "module": "mlp",
742
+ "projection": "down_proj",
743
+ "total_params": 50331648,
744
+ "modified_params": 524288,
745
+ "ratio": 0.010416666666666666,
746
+ "source_layers_count": 8
747
+ },
748
+ "model.language_model.layers.5.mlp.gate_proj.weight": {
749
+ "tensor_name": "model.language_model.layers.5.mlp.gate_proj.weight",
750
+ "layer": 5,
751
+ "module": "mlp",
752
+ "projection": "gate_proj",
753
+ "total_params": 50331648,
754
+ "modified_params": 524288,
755
+ "ratio": 0.010416666666666666,
756
+ "source_layers_count": 8
757
+ },
758
+ "model.language_model.layers.5.mlp.up_proj.weight": {
759
+ "tensor_name": "model.language_model.layers.5.mlp.up_proj.weight",
760
+ "layer": 5,
761
+ "module": "mlp",
762
+ "projection": "up_proj",
763
+ "total_params": 50331648,
764
+ "modified_params": 524288,
765
+ "ratio": 0.010416666666666666,
766
+ "source_layers_count": 8
767
+ },
768
+ "model.language_model.layers.6.mlp.down_proj.weight": {
769
+ "tensor_name": "model.language_model.layers.6.mlp.down_proj.weight",
770
+ "layer": 6,
771
+ "module": "mlp",
772
+ "projection": "down_proj",
773
+ "total_params": 50331648,
774
+ "modified_params": 524288,
775
+ "ratio": 0.010416666666666666,
776
+ "source_layers_count": 8
777
+ },
778
+ "model.language_model.layers.6.mlp.gate_proj.weight": {
779
+ "tensor_name": "model.language_model.layers.6.mlp.gate_proj.weight",
780
+ "layer": 6,
781
+ "module": "mlp",
782
+ "projection": "gate_proj",
783
+ "total_params": 50331648,
784
+ "modified_params": 524288,
785
+ "ratio": 0.010416666666666666,
786
+ "source_layers_count": 8
787
+ },
788
+ "model.language_model.layers.6.mlp.up_proj.weight": {
789
+ "tensor_name": "model.language_model.layers.6.mlp.up_proj.weight",
790
+ "layer": 6,
791
+ "module": "mlp",
792
+ "projection": "up_proj",
793
+ "total_params": 50331648,
794
+ "modified_params": 524288,
795
+ "ratio": 0.010416666666666666,
796
+ "source_layers_count": 8
797
+ },
798
+ "model.language_model.layers.7.mlp.down_proj.weight": {
799
+ "tensor_name": "model.language_model.layers.7.mlp.down_proj.weight",
800
+ "layer": 7,
801
+ "module": "mlp",
802
+ "projection": "down_proj",
803
+ "total_params": 50331648,
804
+ "modified_params": 524288,
805
+ "ratio": 0.010416666666666666,
806
+ "source_layers_count": 8
807
+ },
808
+ "model.language_model.layers.7.mlp.gate_proj.weight": {
809
+ "tensor_name": "model.language_model.layers.7.mlp.gate_proj.weight",
810
+ "layer": 7,
811
+ "module": "mlp",
812
+ "projection": "gate_proj",
813
+ "total_params": 50331648,
814
+ "modified_params": 524288,
815
+ "ratio": 0.010416666666666666,
816
+ "source_layers_count": 8
817
+ },
818
+ "model.language_model.layers.7.mlp.up_proj.weight": {
819
+ "tensor_name": "model.language_model.layers.7.mlp.up_proj.weight",
820
+ "layer": 7,
821
+ "module": "mlp",
822
+ "projection": "up_proj",
823
+ "total_params": 50331648,
824
+ "modified_params": 524288,
825
+ "ratio": 0.010416666666666666,
826
+ "source_layers_count": 8
827
+ },
828
+ "model.language_model.layers.2.mlp.down_proj.weight": {
829
+ "tensor_name": "model.language_model.layers.2.mlp.down_proj.weight",
830
+ "layer": 2,
831
+ "module": "mlp",
832
+ "projection": "down_proj",
833
+ "total_params": 50331648,
834
+ "modified_params": 524288,
835
+ "ratio": 0.010416666666666666,
836
+ "source_layers_count": 8
837
+ },
838
+ "model.language_model.layers.2.mlp.gate_proj.weight": {
839
+ "tensor_name": "model.language_model.layers.2.mlp.gate_proj.weight",
840
+ "layer": 2,
841
+ "module": "mlp",
842
+ "projection": "gate_proj",
843
+ "total_params": 50331648,
844
+ "modified_params": 524288,
845
+ "ratio": 0.010416666666666666,
846
+ "source_layers_count": 8
847
+ },
848
+ "model.language_model.layers.2.mlp.up_proj.weight": {
849
+ "tensor_name": "model.language_model.layers.2.mlp.up_proj.weight",
850
+ "layer": 2,
851
+ "module": "mlp",
852
+ "projection": "up_proj",
853
+ "total_params": 50331648,
854
+ "modified_params": 524288,
855
+ "ratio": 0.010416666666666666,
856
+ "source_layers_count": 8
857
+ },
858
+ "model.language_model.layers.20.mlp.down_proj.weight": {
859
+ "tensor_name": "model.language_model.layers.20.mlp.down_proj.weight",
860
+ "layer": 20,
861
+ "module": "mlp",
862
+ "projection": "down_proj",
863
+ "total_params": 50331648,
864
+ "modified_params": 524288,
865
+ "ratio": 0.010416666666666666,
866
+ "source_layers_count": 8
867
+ },
868
+ "model.language_model.layers.20.mlp.gate_proj.weight": {
869
+ "tensor_name": "model.language_model.layers.20.mlp.gate_proj.weight",
870
+ "layer": 20,
871
+ "module": "mlp",
872
+ "projection": "gate_proj",
873
+ "total_params": 50331648,
874
+ "modified_params": 524288,
875
+ "ratio": 0.010416666666666666,
876
+ "source_layers_count": 8
877
+ },
878
+ "model.language_model.layers.20.mlp.up_proj.weight": {
879
+ "tensor_name": "model.language_model.layers.20.mlp.up_proj.weight",
880
+ "layer": 20,
881
+ "module": "mlp",
882
+ "projection": "up_proj",
883
+ "total_params": 50331648,
884
+ "modified_params": 524288,
885
+ "ratio": 0.010416666666666666,
886
+ "source_layers_count": 8
887
+ },
888
+ "model.language_model.layers.28.mlp.down_proj.weight": {
889
+ "tensor_name": "model.language_model.layers.28.mlp.down_proj.weight",
890
+ "layer": 28,
891
+ "module": "mlp",
892
+ "projection": "down_proj",
893
+ "total_params": 50331648,
894
+ "modified_params": 524288,
895
+ "ratio": 0.010416666666666666,
896
+ "source_layers_count": 8
897
+ },
898
+ "model.language_model.layers.28.mlp.gate_proj.weight": {
899
+ "tensor_name": "model.language_model.layers.28.mlp.gate_proj.weight",
900
+ "layer": 28,
901
+ "module": "mlp",
902
+ "projection": "gate_proj",
903
+ "total_params": 50331648,
904
+ "modified_params": 524288,
905
+ "ratio": 0.010416666666666666,
906
+ "source_layers_count": 8
907
+ },
908
+ "model.language_model.layers.28.mlp.up_proj.weight": {
909
+ "tensor_name": "model.language_model.layers.28.mlp.up_proj.weight",
910
+ "layer": 28,
911
+ "module": "mlp",
912
+ "projection": "up_proj",
913
+ "total_params": 50331648,
914
+ "modified_params": 524288,
915
+ "ratio": 0.010416666666666666,
916
+ "source_layers_count": 8
917
+ },
918
+ "model.language_model.layers.29.mlp.down_proj.weight": {
919
+ "tensor_name": "model.language_model.layers.29.mlp.down_proj.weight",
920
+ "layer": 29,
921
+ "module": "mlp",
922
+ "projection": "down_proj",
923
+ "total_params": 50331648,
924
+ "modified_params": 524288,
925
+ "ratio": 0.010416666666666666,
926
+ "source_layers_count": 8
927
+ },
928
+ "model.language_model.layers.29.mlp.gate_proj.weight": {
929
+ "tensor_name": "model.language_model.layers.29.mlp.gate_proj.weight",
930
+ "layer": 29,
931
+ "module": "mlp",
932
+ "projection": "gate_proj",
933
+ "total_params": 50331648,
934
+ "modified_params": 524288,
935
+ "ratio": 0.010416666666666666,
936
+ "source_layers_count": 8
937
+ },
938
+ "model.language_model.layers.29.mlp.up_proj.weight": {
939
+ "tensor_name": "model.language_model.layers.29.mlp.up_proj.weight",
940
+ "layer": 29,
941
+ "module": "mlp",
942
+ "projection": "up_proj",
943
+ "total_params": 50331648,
944
+ "modified_params": 524288,
945
+ "ratio": 0.010416666666666666,
946
+ "source_layers_count": 8
947
+ },
948
+ "model.language_model.layers.12.mlp.down_proj.weight": {
949
+ "tensor_name": "model.language_model.layers.12.mlp.down_proj.weight",
950
+ "layer": 12,
951
+ "module": "mlp",
952
+ "projection": "down_proj",
953
+ "total_params": 50331648,
954
+ "modified_params": 524288,
955
+ "ratio": 0.010416666666666666,
956
+ "source_layers_count": 8
957
+ },
958
+ "model.language_model.layers.12.mlp.gate_proj.weight": {
959
+ "tensor_name": "model.language_model.layers.12.mlp.gate_proj.weight",
960
+ "layer": 12,
961
+ "module": "mlp",
962
+ "projection": "gate_proj",
963
+ "total_params": 50331648,
964
+ "modified_params": 524288,
965
+ "ratio": 0.010416666666666666,
966
+ "source_layers_count": 8
967
+ },
968
+ "model.language_model.layers.12.mlp.up_proj.weight": {
969
+ "tensor_name": "model.language_model.layers.12.mlp.up_proj.weight",
970
+ "layer": 12,
971
+ "module": "mlp",
972
+ "projection": "up_proj",
973
+ "total_params": 50331648,
974
+ "modified_params": 524288,
975
+ "ratio": 0.010416666666666666,
976
+ "source_layers_count": 8
977
+ },
978
+ "model.language_model.layers.13.mlp.down_proj.weight": {
979
+ "tensor_name": "model.language_model.layers.13.mlp.down_proj.weight",
980
+ "layer": 13,
981
+ "module": "mlp",
982
+ "projection": "down_proj",
983
+ "total_params": 50331648,
984
+ "modified_params": 524288,
985
+ "ratio": 0.010416666666666666,
986
+ "source_layers_count": 8
987
+ },
988
+ "model.language_model.layers.13.mlp.gate_proj.weight": {
989
+ "tensor_name": "model.language_model.layers.13.mlp.gate_proj.weight",
990
+ "layer": 13,
991
+ "module": "mlp",
992
+ "projection": "gate_proj",
993
+ "total_params": 50331648,
994
+ "modified_params": 524288,
995
+ "ratio": 0.010416666666666666,
996
+ "source_layers_count": 8
997
+ },
998
+ "model.language_model.layers.13.mlp.up_proj.weight": {
999
+ "tensor_name": "model.language_model.layers.13.mlp.up_proj.weight",
1000
+ "layer": 13,
1001
+ "module": "mlp",
1002
+ "projection": "up_proj",
1003
+ "total_params": 50331648,
1004
+ "modified_params": 524288,
1005
+ "ratio": 0.010416666666666666,
1006
+ "source_layers_count": 8
1007
+ },
1008
+ "model.language_model.layers.15.self_attn.q_proj.weight": {
1009
+ "tensor_name": "model.language_model.layers.15.self_attn.q_proj.weight",
1010
+ "layer": 15,
1011
+ "module": "attn",
1012
+ "projection": "q_proj",
1013
+ "total_params": 33554432,
1014
+ "modified_params": 524288,
1015
+ "ratio": 0.015625,
1016
+ "source_layers_count": 8
1017
+ },
1018
+ "model.language_model.layers.27.self_attn.q_proj.weight": {
1019
+ "tensor_name": "model.language_model.layers.27.self_attn.q_proj.weight",
1020
+ "layer": 27,
1021
+ "module": "attn",
1022
+ "projection": "q_proj",
1023
+ "total_params": 33554432,
1024
+ "modified_params": 524288,
1025
+ "ratio": 0.015625,
1026
+ "source_layers_count": 8
1027
+ },
1028
+ "model.language_model.layers.3.self_attn.q_proj.weight": {
1029
+ "tensor_name": "model.language_model.layers.3.self_attn.q_proj.weight",
1030
+ "layer": 3,
1031
+ "module": "attn",
1032
+ "projection": "q_proj",
1033
+ "total_params": 33554432,
1034
+ "modified_params": 524288,
1035
+ "ratio": 0.015625,
1036
+ "source_layers_count": 8
1037
+ },
1038
+ "mtp.layers.0.self_attn.q_proj.weight": {
1039
+ "tensor_name": "mtp.layers.0.self_attn.q_proj.weight",
1040
+ "layer": 0,
1041
+ "module": "attn",
1042
+ "projection": "q_proj",
1043
+ "total_params": 33554432,
1044
+ "modified_params": 524288,
1045
+ "ratio": 0.015625,
1046
+ "source_layers_count": 8
1047
+ },
1048
+ "model.language_model.layers.11.self_attn.q_proj.weight": {
1049
+ "tensor_name": "model.language_model.layers.11.self_attn.q_proj.weight",
1050
+ "layer": 11,
1051
+ "module": "attn",
1052
+ "projection": "q_proj",
1053
+ "total_params": 33554432,
1054
+ "modified_params": 524288,
1055
+ "ratio": 0.015625,
1056
+ "source_layers_count": 8
1057
+ },
1058
+ "model.language_model.layers.23.self_attn.q_proj.weight": {
1059
+ "tensor_name": "model.language_model.layers.23.self_attn.q_proj.weight",
1060
+ "layer": 23,
1061
+ "module": "attn",
1062
+ "projection": "q_proj",
1063
+ "total_params": 33554432,
1064
+ "modified_params": 524288,
1065
+ "ratio": 0.015625,
1066
+ "source_layers_count": 8
1067
+ },
1068
+ "model.language_model.layers.7.self_attn.q_proj.weight": {
1069
+ "tensor_name": "model.language_model.layers.7.self_attn.q_proj.weight",
1070
+ "layer": 7,
1071
+ "module": "attn",
1072
+ "projection": "q_proj",
1073
+ "total_params": 33554432,
1074
+ "modified_params": 524288,
1075
+ "ratio": 0.015625,
1076
+ "source_layers_count": 8
1077
+ },
1078
+ "model.language_model.layers.31.self_attn.q_proj.weight": {
1079
+ "tensor_name": "model.language_model.layers.31.self_attn.q_proj.weight",
1080
+ "layer": 31,
1081
+ "module": "attn",
1082
+ "projection": "q_proj",
1083
+ "total_params": 33554432,
1084
+ "modified_params": 524288,
1085
+ "ratio": 0.015625,
1086
+ "source_layers_count": 8
1087
+ },
1088
+ "model.language_model.layers.19.self_attn.q_proj.weight": {
1089
+ "tensor_name": "model.language_model.layers.19.self_attn.q_proj.weight",
1090
+ "layer": 19,
1091
+ "module": "attn",
1092
+ "projection": "q_proj",
1093
+ "total_params": 33554432,
1094
+ "modified_params": 524288,
1095
+ "ratio": 0.015625,
1096
+ "source_layers_count": 8
1097
+ },
1098
+ "model.language_model.layers.15.self_attn.o_proj.weight": {
1099
+ "tensor_name": "model.language_model.layers.15.self_attn.o_proj.weight",
1100
+ "layer": 15,
1101
+ "module": "attn",
1102
+ "projection": "o_proj",
1103
+ "total_params": 16777216,
1104
+ "modified_params": 524288,
1105
+ "ratio": 0.03125,
1106
+ "source_layers_count": 8
1107
+ },
1108
+ "model.language_model.layers.27.self_attn.o_proj.weight": {
1109
+ "tensor_name": "model.language_model.layers.27.self_attn.o_proj.weight",
1110
+ "layer": 27,
1111
+ "module": "attn",
1112
+ "projection": "o_proj",
1113
+ "total_params": 16777216,
1114
+ "modified_params": 524288,
1115
+ "ratio": 0.03125,
1116
+ "source_layers_count": 8
1117
+ },
1118
+ "model.language_model.layers.3.self_attn.o_proj.weight": {
1119
+ "tensor_name": "model.language_model.layers.3.self_attn.o_proj.weight",
1120
+ "layer": 3,
1121
+ "module": "attn",
1122
+ "projection": "o_proj",
1123
+ "total_params": 16777216,
1124
+ "modified_params": 524288,
1125
+ "ratio": 0.03125,
1126
+ "source_layers_count": 8
1127
+ },
1128
+ "mtp.layers.0.self_attn.o_proj.weight": {
1129
+ "tensor_name": "mtp.layers.0.self_attn.o_proj.weight",
1130
+ "layer": 0,
1131
+ "module": "attn",
1132
+ "projection": "o_proj",
1133
+ "total_params": 16777216,
1134
+ "modified_params": 524288,
1135
+ "ratio": 0.03125,
1136
+ "source_layers_count": 8
1137
+ },
1138
+ "model.language_model.layers.11.self_attn.o_proj.weight": {
1139
+ "tensor_name": "model.language_model.layers.11.self_attn.o_proj.weight",
1140
+ "layer": 11,
1141
+ "module": "attn",
1142
+ "projection": "o_proj",
1143
+ "total_params": 16777216,
1144
+ "modified_params": 524288,
1145
+ "ratio": 0.03125,
1146
+ "source_layers_count": 8
1147
+ },
1148
+ "model.language_model.layers.23.self_attn.o_proj.weight": {
1149
+ "tensor_name": "model.language_model.layers.23.self_attn.o_proj.weight",
1150
+ "layer": 23,
1151
+ "module": "attn",
1152
+ "projection": "o_proj",
1153
+ "total_params": 16777216,
1154
+ "modified_params": 524288,
1155
+ "ratio": 0.03125,
1156
+ "source_layers_count": 8
1157
+ },
1158
+ "model.language_model.layers.7.self_attn.o_proj.weight": {
1159
+ "tensor_name": "model.language_model.layers.7.self_attn.o_proj.weight",
1160
+ "layer": 7,
1161
+ "module": "attn",
1162
+ "projection": "o_proj",
1163
+ "total_params": 16777216,
1164
+ "modified_params": 524288,
1165
+ "ratio": 0.03125,
1166
+ "source_layers_count": 8
1167
+ },
1168
+ "model.language_model.layers.31.self_attn.o_proj.weight": {
1169
+ "tensor_name": "model.language_model.layers.31.self_attn.o_proj.weight",
1170
+ "layer": 31,
1171
+ "module": "attn",
1172
+ "projection": "o_proj",
1173
+ "total_params": 16777216,
1174
+ "modified_params": 524288,
1175
+ "ratio": 0.03125,
1176
+ "source_layers_count": 8
1177
+ },
1178
+ "model.language_model.layers.19.self_attn.o_proj.weight": {
1179
+ "tensor_name": "model.language_model.layers.19.self_attn.o_proj.weight",
1180
+ "layer": 19,
1181
+ "module": "attn",
1182
+ "projection": "o_proj",
1183
+ "total_params": 16777216,
1184
+ "modified_params": 524288,
1185
+ "ratio": 0.03125,
1186
+ "source_layers_count": 8
1187
+ },
1188
+ "model.language_model.layers.15.self_attn.k_proj.weight": {
1189
+ "tensor_name": "model.language_model.layers.15.self_attn.k_proj.weight",
1190
+ "layer": 15,
1191
+ "module": "attn",
1192
+ "projection": "k_proj",
1193
+ "total_params": 4194304,
1194
+ "modified_params": 524288,
1195
+ "ratio": 0.125,
1196
+ "source_layers_count": 8
1197
+ },
1198
+ "model.language_model.layers.15.self_attn.v_proj.weight": {
1199
+ "tensor_name": "model.language_model.layers.15.self_attn.v_proj.weight",
1200
+ "layer": 15,
1201
+ "module": "attn",
1202
+ "projection": "v_proj",
1203
+ "total_params": 4194304,
1204
+ "modified_params": 524288,
1205
+ "ratio": 0.125,
1206
+ "source_layers_count": 8
1207
+ },
1208
+ "model.language_model.layers.27.self_attn.k_proj.weight": {
1209
+ "tensor_name": "model.language_model.layers.27.self_attn.k_proj.weight",
1210
+ "layer": 27,
1211
+ "module": "attn",
1212
+ "projection": "k_proj",
1213
+ "total_params": 4194304,
1214
+ "modified_params": 524288,
1215
+ "ratio": 0.125,
1216
+ "source_layers_count": 8
1217
+ },
1218
+ "model.language_model.layers.27.self_attn.v_proj.weight": {
1219
+ "tensor_name": "model.language_model.layers.27.self_attn.v_proj.weight",
1220
+ "layer": 27,
1221
+ "module": "attn",
1222
+ "projection": "v_proj",
1223
+ "total_params": 4194304,
1224
+ "modified_params": 524288,
1225
+ "ratio": 0.125,
1226
+ "source_layers_count": 8
1227
+ },
1228
+ "model.language_model.layers.3.self_attn.k_proj.weight": {
1229
+ "tensor_name": "model.language_model.layers.3.self_attn.k_proj.weight",
1230
+ "layer": 3,
1231
+ "module": "attn",
1232
+ "projection": "k_proj",
1233
+ "total_params": 4194304,
1234
+ "modified_params": 524288,
1235
+ "ratio": 0.125,
1236
+ "source_layers_count": 8
1237
+ },
1238
+ "model.language_model.layers.3.self_attn.v_proj.weight": {
1239
+ "tensor_name": "model.language_model.layers.3.self_attn.v_proj.weight",
1240
+ "layer": 3,
1241
+ "module": "attn",
1242
+ "projection": "v_proj",
1243
+ "total_params": 4194304,
1244
+ "modified_params": 524288,
1245
+ "ratio": 0.125,
1246
+ "source_layers_count": 8
1247
+ },
1248
+ "mtp.layers.0.self_attn.k_proj.weight": {
1249
+ "tensor_name": "mtp.layers.0.self_attn.k_proj.weight",
1250
+ "layer": 0,
1251
+ "module": "attn",
1252
+ "projection": "k_proj",
1253
+ "total_params": 4194304,
1254
+ "modified_params": 524288,
1255
+ "ratio": 0.125,
1256
+ "source_layers_count": 8
1257
+ },
1258
+ "mtp.layers.0.self_attn.v_proj.weight": {
1259
+ "tensor_name": "mtp.layers.0.self_attn.v_proj.weight",
1260
+ "layer": 0,
1261
+ "module": "attn",
1262
+ "projection": "v_proj",
1263
+ "total_params": 4194304,
1264
+ "modified_params": 524288,
1265
+ "ratio": 0.125,
1266
+ "source_layers_count": 8
1267
+ },
1268
+ "model.language_model.layers.11.self_attn.k_proj.weight": {
1269
+ "tensor_name": "model.language_model.layers.11.self_attn.k_proj.weight",
1270
+ "layer": 11,
1271
+ "module": "attn",
1272
+ "projection": "k_proj",
1273
+ "total_params": 4194304,
1274
+ "modified_params": 524288,
1275
+ "ratio": 0.125,
1276
+ "source_layers_count": 8
1277
+ },
1278
+ "model.language_model.layers.11.self_attn.v_proj.weight": {
1279
+ "tensor_name": "model.language_model.layers.11.self_attn.v_proj.weight",
1280
+ "layer": 11,
1281
+ "module": "attn",
1282
+ "projection": "v_proj",
1283
+ "total_params": 4194304,
1284
+ "modified_params": 524288,
1285
+ "ratio": 0.125,
1286
+ "source_layers_count": 8
1287
+ },
1288
+ "model.language_model.layers.23.self_attn.k_proj.weight": {
1289
+ "tensor_name": "model.language_model.layers.23.self_attn.k_proj.weight",
1290
+ "layer": 23,
1291
+ "module": "attn",
1292
+ "projection": "k_proj",
1293
+ "total_params": 4194304,
1294
+ "modified_params": 524288,
1295
+ "ratio": 0.125,
1296
+ "source_layers_count": 8
1297
+ },
1298
+ "model.language_model.layers.23.self_attn.v_proj.weight": {
1299
+ "tensor_name": "model.language_model.layers.23.self_attn.v_proj.weight",
1300
+ "layer": 23,
1301
+ "module": "attn",
1302
+ "projection": "v_proj",
1303
+ "total_params": 4194304,
1304
+ "modified_params": 524288,
1305
+ "ratio": 0.125,
1306
+ "source_layers_count": 8
1307
+ },
1308
+ "model.language_model.layers.7.self_attn.k_proj.weight": {
1309
+ "tensor_name": "model.language_model.layers.7.self_attn.k_proj.weight",
1310
+ "layer": 7,
1311
+ "module": "attn",
1312
+ "projection": "k_proj",
1313
+ "total_params": 4194304,
1314
+ "modified_params": 524288,
1315
+ "ratio": 0.125,
1316
+ "source_layers_count": 8
1317
+ },
1318
+ "model.language_model.layers.7.self_attn.v_proj.weight": {
1319
+ "tensor_name": "model.language_model.layers.7.self_attn.v_proj.weight",
1320
+ "layer": 7,
1321
+ "module": "attn",
1322
+ "projection": "v_proj",
1323
+ "total_params": 4194304,
1324
+ "modified_params": 524288,
1325
+ "ratio": 0.125,
1326
+ "source_layers_count": 8
1327
+ },
1328
+ "model.language_model.layers.31.self_attn.k_proj.weight": {
1329
+ "tensor_name": "model.language_model.layers.31.self_attn.k_proj.weight",
1330
+ "layer": 31,
1331
+ "module": "attn",
1332
+ "projection": "k_proj",
1333
+ "total_params": 4194304,
1334
+ "modified_params": 524288,
1335
+ "ratio": 0.125,
1336
+ "source_layers_count": 8
1337
+ },
1338
+ "model.language_model.layers.31.self_attn.v_proj.weight": {
1339
+ "tensor_name": "model.language_model.layers.31.self_attn.v_proj.weight",
1340
+ "layer": 31,
1341
+ "module": "attn",
1342
+ "projection": "v_proj",
1343
+ "total_params": 4194304,
1344
+ "modified_params": 524288,
1345
+ "ratio": 0.125,
1346
+ "source_layers_count": 8
1347
+ },
1348
+ "model.language_model.layers.19.self_attn.k_proj.weight": {
1349
+ "tensor_name": "model.language_model.layers.19.self_attn.k_proj.weight",
1350
+ "layer": 19,
1351
+ "module": "attn",
1352
+ "projection": "k_proj",
1353
+ "total_params": 4194304,
1354
+ "modified_params": 524288,
1355
+ "ratio": 0.125,
1356
+ "source_layers_count": 8
1357
+ },
1358
+ "model.language_model.layers.19.self_attn.v_proj.weight": {
1359
+ "tensor_name": "model.language_model.layers.19.self_attn.v_proj.weight",
1360
+ "layer": 19,
1361
+ "module": "attn",
1362
+ "projection": "v_proj",
1363
+ "total_params": 4194304,
1364
+ "modified_params": 524288,
1365
+ "ratio": 0.125,
1366
+ "source_layers_count": 8
1367
+ }
1368
+ }
1369
+ }