File size: 11,667 Bytes
b5a6aa7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
layer,module,loss,samples,damp,time
0,self_attn.k_proj,0.0000003594,0.05000,4.072
0,self_attn.v_proj,0.0000003196,0.05000,4.159
0,self_attn.q_proj,0.0000013323,0.05000,4.189
0,self_attn.o_proj,0.0000008583,0.05000,1.501
0,mlp.gate_proj,0.0000935746,0.05000,2.208
0,mlp.up_proj,0.0000823376,0.05000,2.229
0,mlp.down_proj,0.0000093483,0.05000,3.781
1,self_attn.k_proj,0.0000005827,0.05000,4.169
1,self_attn.q_proj,0.0000022050,0.05000,4.270
1,self_attn.v_proj,0.0000006061,0.05000,4.286
1,self_attn.o_proj,0.0000013489,0.05000,1.449
1,mlp.up_proj,0.0012403375,0.05000,1.993
1,mlp.gate_proj,0.0027883543,0.05000,2.016
1,mlp.down_proj,0.0000149858,0.05000,3.810
2,self_attn.k_proj,0.0000017745,0.05000,4.243
2,self_attn.q_proj,0.0000062511,0.05000,4.262
2,self_attn.v_proj,0.0000017546,0.05000,4.358
2,self_attn.o_proj,0.0000015581,0.05000,1.491
2,mlp.up_proj,0.0037729626,0.05000,2.027
2,mlp.gate_proj,0.0049998074,0.05000,2.042
2,mlp.down_proj,0.0000086235,0.05000,3.796
3,self_attn.k_proj,0.0000031761,0.05000,4.226
3,self_attn.q_proj,0.0000124558,0.05000,4.254
3,self_attn.v_proj,0.0000033006,0.05000,4.299
3,self_attn.o_proj,0.0000027714,0.05000,1.471
3,mlp.up_proj,0.0025942083,0.05000,2.045
3,mlp.gate_proj,0.0050245722,0.05000,2.055
3,mlp.down_proj,0.0000275649,0.05000,3.933
4,self_attn.q_proj,0.0000246425,0.05000,4.286
4,self_attn.k_proj,0.0000067264,0.05000,4.288
4,self_attn.v_proj,0.0000068391,0.05000,4.351
4,self_attn.o_proj,0.0000045797,0.05000,1.461
4,mlp.gate_proj,0.0047045770,0.05000,2.064
4,mlp.up_proj,0.0019233117,0.05000,2.079
4,mlp.down_proj,0.0000489517,0.05000,3.845
5,self_attn.v_proj,0.0000077134,0.05000,4.167
5,self_attn.q_proj,0.0000278327,0.05000,4.278
5,self_attn.k_proj,0.0000069916,0.05000,4.305
5,self_attn.o_proj,0.0000070910,0.05000,1.466
5,mlp.up_proj,0.0008793870,0.05000,1.962
5,mlp.gate_proj,0.0017394822,0.05000,1.991
5,mlp.down_proj,0.0000607402,0.05000,3.847
6,self_attn.v_proj,0.0000168110,0.05000,4.083
6,self_attn.k_proj,0.0000146507,0.05000,4.192
6,self_attn.q_proj,0.0000625633,0.05000,4.207
6,self_attn.o_proj,0.0000228399,0.05000,1.457
6,mlp.up_proj,0.0011648920,0.05000,1.999
6,mlp.gate_proj,0.0019183903,0.05000,2.030
6,mlp.down_proj,0.0103364748,0.05000,3.803
7,self_attn.q_proj,0.0001241763,0.05000,4.408
7,self_attn.v_proj,0.0000359717,0.05000,4.429
7,self_attn.k_proj,0.0000332411,0.05000,4.472
7,self_attn.o_proj,0.0000210265,0.05000,1.481
7,mlp.up_proj,0.0013953543,0.05000,2.171
7,mlp.gate_proj,0.0021643315,0.05000,2.191
7,mlp.down_proj,0.0000991852,0.05000,3.856
8,self_attn.q_proj,0.0001985875,0.05000,4.326
8,self_attn.v_proj,0.0000591547,0.05000,4.352
8,self_attn.k_proj,0.0000495564,0.05000,4.384
8,self_attn.o_proj,0.0000302952,0.05000,1.425
8,mlp.gate_proj,0.0016333003,0.05000,2.126
8,mlp.up_proj,0.0013568534,0.05000,2.144
8,mlp.down_proj,0.0001514721,0.05000,3.858
9,self_attn.q_proj,0.0002371635,0.05000,4.318
9,self_attn.k_proj,0.0000649721,0.05000,4.368
9,self_attn.v_proj,0.0000708001,0.05000,4.392
9,self_attn.o_proj,0.0000351153,0.05000,1.461
9,mlp.gate_proj,0.0026187571,0.05000,2.003
9,mlp.up_proj,0.0017171524,0.05000,2.035
9,mlp.down_proj,0.0001497422,0.05000,3.754
10,self_attn.q_proj,0.0003617231,0.05000,4.318
10,self_attn.k_proj,0.0000914477,0.05000,4.385
10,self_attn.v_proj,0.0001139594,0.05000,4.396
10,self_attn.o_proj,0.0000542123,0.05000,1.558
10,mlp.up_proj,0.0014414165,0.05000,2.250
10,mlp.gate_proj,0.0020228552,0.05000,2.267
10,mlp.down_proj,0.0001341613,0.05000,3.767
11,self_attn.q_proj,0.0001707963,0.05000,4.467
11,self_attn.v_proj,0.0000511366,0.05000,4.515
11,self_attn.k_proj,0.0000445927,0.05000,4.552
11,self_attn.o_proj,0.0000347715,0.05000,1.577
11,mlp.gate_proj,0.0016375363,0.05000,2.061
11,mlp.up_proj,0.0013137128,0.05000,2.088
11,mlp.down_proj,0.0001213440,0.05000,3.846
12,self_attn.q_proj,0.0001992994,0.05000,4.345
12,self_attn.v_proj,0.0000592262,0.05000,4.403
12,self_attn.k_proj,0.0000508667,0.05000,4.437
12,self_attn.o_proj,0.0000389608,0.05000,1.487
12,mlp.up_proj,0.0012556559,0.05000,2.126
12,mlp.gate_proj,0.0014193900,0.05000,2.153
12,mlp.down_proj,0.0001204108,0.05000,3.814
13,self_attn.k_proj,0.0000377959,0.05000,4.361
13,self_attn.v_proj,0.0000398980,0.05000,4.377
13,self_attn.q_proj,0.0001468130,0.05000,4.406
13,self_attn.o_proj,0.0000275558,0.05000,1.572
13,mlp.gate_proj,0.0013068827,0.05000,1.982
13,mlp.up_proj,0.0012688375,0.05000,1.992
13,mlp.down_proj,0.0001261197,0.05000,3.786
14,self_attn.k_proj,0.0000636807,0.05000,4.260
14,self_attn.q_proj,0.0002550139,0.05000,4.352
14,self_attn.v_proj,0.0000732032,0.05000,4.377
14,self_attn.o_proj,0.0000563386,0.05000,1.452
14,mlp.gate_proj,0.0012963092,0.05000,2.037
14,mlp.up_proj,0.0012702085,0.05000,2.074
14,mlp.down_proj,0.0001155589,0.05000,3.801
15,self_attn.k_proj,0.0000619257,0.05000,4.268
15,self_attn.v_proj,0.0000667899,0.05000,4.398
15,self_attn.q_proj,0.0002482918,0.05000,4.422
15,self_attn.o_proj,0.0000455493,0.05000,1.434
15,mlp.gate_proj,0.0011929697,0.05000,2.083
15,mlp.up_proj,0.0012112681,0.05000,2.113
15,mlp.down_proj,0.0001076081,0.05000,3.777
16,self_attn.v_proj,0.0001263295,0.05000,4.350
16,self_attn.q_proj,0.0004257370,0.05000,4.410
16,self_attn.k_proj,0.0001013038,0.05000,4.432
16,self_attn.o_proj,0.0000473602,0.05000,1.420
16,mlp.gate_proj,0.0013676042,0.05000,2.118
16,mlp.up_proj,0.0013057378,0.05000,2.139
16,mlp.down_proj,0.0007478101,0.05000,3.826
17,self_attn.q_proj,0.0003468467,0.05000,4.276
17,self_attn.k_proj,0.0000746891,0.05000,4.327
17,self_attn.v_proj,0.0000924036,0.05000,4.352
17,self_attn.o_proj,0.0000458996,0.05000,1.441
17,mlp.gate_proj,0.0011096764,0.05000,2.045
17,mlp.up_proj,0.0010861262,0.05000,2.062
17,mlp.down_proj,0.0000963411,0.05000,3.819
18,self_attn.q_proj,0.0003796826,0.05000,4.261
18,self_attn.v_proj,0.0001097278,0.05000,4.273
18,self_attn.k_proj,0.0000885717,0.05000,4.317
18,self_attn.o_proj,0.0000499444,0.05000,1.491
18,mlp.up_proj,0.0011784257,0.05000,1.997
18,mlp.gate_proj,0.0011817602,0.05000,2.042
18,mlp.down_proj,0.0001195181,0.05000,3.730
19,self_attn.k_proj,0.0001662553,0.05000,4.191
19,self_attn.q_proj,0.0007413589,0.05000,4.338
19,self_attn.v_proj,0.0001895868,0.05000,4.369
19,self_attn.o_proj,0.0000687233,0.05000,1.466
19,mlp.gate_proj,0.0012607713,0.05000,2.092
19,mlp.up_proj,0.0012681201,0.05000,2.113
19,mlp.down_proj,0.0001470167,0.05000,3.758
20,self_attn.k_proj,0.0001367688,0.05000,4.472
20,self_attn.q_proj,0.0006558189,0.05000,4.530
20,self_attn.v_proj,0.0001652159,0.05000,4.549
20,self_attn.o_proj,0.0000642542,0.05000,1.464
20,mlp.gate_proj,0.0013226730,0.05000,2.004
20,mlp.up_proj,0.0013584104,0.05000,2.027
20,mlp.down_proj,0.0001604849,0.05000,3.849
21,self_attn.v_proj,0.0002245796,0.05000,4.389
21,self_attn.q_proj,0.0008645666,0.05000,4.438
21,self_attn.k_proj,0.0001859100,0.05000,4.476
21,self_attn.o_proj,0.0000763351,0.05000,1.458
21,mlp.up_proj,0.0016042628,0.05000,2.163
21,mlp.gate_proj,0.0015679000,0.05000,2.186
21,mlp.down_proj,0.0002100836,0.05000,3.945
22,self_attn.q_proj,0.0016993005,0.05000,4.338
22,self_attn.k_proj,0.0003806636,0.05000,4.384
22,self_attn.v_proj,0.0004654586,0.05000,4.407
22,self_attn.o_proj,0.0001504707,0.05000,1.440
22,mlp.up_proj,0.0017626628,0.05000,2.070
22,mlp.gate_proj,0.0018012372,0.05000,2.091
22,mlp.down_proj,0.0003374911,0.05000,3.859
23,self_attn.k_proj,0.0003534277,0.05000,4.390
23,self_attn.v_proj,0.0004446394,0.05000,4.453
23,self_attn.q_proj,0.0016562948,0.05000,4.478
23,self_attn.o_proj,0.0001922144,0.05000,1.483
23,mlp.gate_proj,0.0022250249,0.05000,2.107
23,mlp.up_proj,0.0020641453,0.05000,2.127
23,mlp.down_proj,0.0004860388,0.05000,3.830
24,self_attn.v_proj,0.0008083182,0.05000,4.367
24,self_attn.k_proj,0.0005927957,0.05000,4.460
24,self_attn.q_proj,0.0027235131,0.05000,4.483
24,self_attn.o_proj,0.0002267568,0.05000,1.471
24,mlp.up_proj,0.0022716492,0.05000,2.094
24,mlp.gate_proj,0.0025005769,0.05000,2.118
24,mlp.down_proj,0.0005901432,0.05000,3.820
25,self_attn.q_proj,0.0019246214,0.05000,4.292
25,self_attn.k_proj,0.0004538385,0.05000,4.343
25,self_attn.v_proj,0.0005557584,0.05000,4.381
25,self_attn.o_proj,0.0001088033,0.05000,1.481
25,mlp.up_proj,0.0026175110,0.05000,2.103
25,mlp.gate_proj,0.0029235990,0.05000,2.102
25,mlp.down_proj,0.0007230263,0.05000,3.834
26,self_attn.q_proj,0.0030830475,0.05000,4.367
26,self_attn.v_proj,0.0008666406,0.05000,4.417
26,self_attn.k_proj,0.0006896122,0.05000,4.424
26,self_attn.o_proj,0.0001164570,0.05000,1.441
26,mlp.gate_proj,0.0035159102,0.05000,2.105
26,mlp.up_proj,0.0032400480,0.05000,2.139
26,mlp.down_proj,0.0008100310,0.05000,3.738
27,self_attn.q_proj,0.0038217060,0.05000,4.317
27,self_attn.k_proj,0.0008253228,0.05000,4.359
27,self_attn.v_proj,0.0011545808,0.05000,4.379
27,self_attn.o_proj,0.0001481975,0.05000,1.495
27,mlp.gate_proj,0.0038771487,0.05000,2.060
27,mlp.up_proj,0.0036957798,0.05000,2.074
27,mlp.down_proj,0.0010563065,0.05000,3.884
28,self_attn.v_proj,0.0012145786,0.05000,4.342
28,self_attn.q_proj,0.0040962543,0.05000,4.407
28,self_attn.k_proj,0.0009609835,0.05000,4.435
28,self_attn.o_proj,0.0002459575,0.05000,1.536
28,mlp.gate_proj,0.0043499027,0.05000,2.249
28,mlp.up_proj,0.0043100494,0.05000,2.284
28,mlp.down_proj,0.0015757555,0.05000,3.881
29,self_attn.k_proj,0.0021209687,0.05000,4.298
29,self_attn.q_proj,0.0095595865,0.05000,4.304
29,self_attn.v_proj,0.0029053794,0.05000,4.363
29,self_attn.o_proj,0.0001616285,0.05000,1.479
29,mlp.up_proj,0.0052241395,0.05000,2.076
29,mlp.gate_proj,0.0050324519,0.05000,2.098
29,mlp.down_proj,0.0018942906,0.05000,3.788
30,self_attn.v_proj,0.0036343644,0.05000,4.247
30,self_attn.k_proj,0.0027200088,0.05000,4.341
30,self_attn.q_proj,0.0110439847,0.05000,4.364
30,self_attn.o_proj,0.0004099395,0.05000,1.508
30,mlp.up_proj,0.0057803822,0.05000,2.120
30,mlp.gate_proj,0.0053990015,0.05000,2.146
30,mlp.down_proj,0.0026668868,0.05000,3.796
31,self_attn.k_proj,0.0035780576,0.05000,4.280
31,self_attn.v_proj,0.0051702445,0.05000,4.344
31,self_attn.q_proj,0.0141764066,0.05000,4.371
31,self_attn.o_proj,0.0004475123,0.05000,1.467
31,mlp.gate_proj,0.0054525739,0.05000,1.951
31,mlp.up_proj,0.0061484700,0.05000,1.985
31,mlp.down_proj,0.0033983206,0.05000,3.878
32,self_attn.k_proj,0.0049337751,0.05000,4.351
32,self_attn.q_proj,0.0206758933,0.05000,4.435
32,self_attn.v_proj,0.0077025177,0.05000,4.465
32,self_attn.o_proj,0.0005332192,0.05000,1.471
32,mlp.gate_proj,0.0057657471,0.05000,2.036
32,mlp.up_proj,0.0066501302,0.05000,2.062
32,mlp.down_proj,0.0043100822,0.05000,3.855
33,self_attn.q_proj,0.0426740829,0.05000,4.337
33,self_attn.k_proj,0.0085745315,0.05000,4.387
33,self_attn.v_proj,0.0163852944,0.05000,4.405
33,self_attn.o_proj,0.0007346065,0.05000,1.459
33,mlp.up_proj,0.0073134509,0.05000,2.152
33,mlp.gate_proj,0.0062658977,0.05000,2.176
33,mlp.down_proj,0.0056324352,0.05000,3.802
34,self_attn.v_proj,0.0134164695,0.05000,4.412
34,self_attn.q_proj,0.0347782521,0.05000,4.430
34,self_attn.k_proj,0.0076354003,0.05000,4.462
34,self_attn.o_proj,0.0018579574,0.05000,1.457
34,mlp.gate_proj,0.0080567708,0.05000,2.135
34,mlp.up_proj,0.0087948156,0.05000,2.168
34,mlp.down_proj,0.0079165159,0.05000,3.861
35,self_attn.q_proj,0.0188541359,0.05000,4.320
35,self_attn.v_proj,0.0064772646,0.05000,4.414
35,self_attn.k_proj,0.0047970992,0.05000,4.437
35,self_attn.o_proj,0.0027409442,0.05000,1.486
35,mlp.up_proj,0.0270876766,0.05000,2.115
35,mlp.gate_proj,0.0218691803,0.05000,2.126
35,mlp.down_proj,0.0207865040,0.05000,3.860