File size: 11,667 Bytes
0b358c2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
layer,module,loss,samples,damp,time
0,self_attn.q_proj,0.0002350870,0.05000,1.389
0,self_attn.k_proj,0.0000495884,0.05000,1.401
0,self_attn.v_proj,0.0000058943,0.05000,1.411
0,self_attn.o_proj,0.0000236358,0.05000,0.468
0,mlp.up_proj,0.0002387998,0.05000,0.884
0,mlp.gate_proj,0.0003092090,0.05000,0.905
0,mlp.down_proj,0.0000352943,0.05000,2.892
1,self_attn.q_proj,0.0000474484,0.05000,1.445
1,self_attn.k_proj,0.0000132815,0.05000,1.464
1,self_attn.v_proj,0.0000025667,0.05000,1.471
1,self_attn.o_proj,0.0000079649,0.05000,0.535
1,mlp.up_proj,0.0130337269,0.05000,1.058
1,mlp.gate_proj,0.0172095274,0.05000,1.066
1,mlp.down_proj,0.0000033589,0.05000,3.471
2,self_attn.k_proj,0.0000159378,0.05000,1.344
2,self_attn.v_proj,0.0000040993,0.05000,1.359
2,self_attn.q_proj,0.0000724677,0.05000,1.372
2,self_attn.o_proj,0.0000051665,0.05000,0.496
2,mlp.gate_proj,0.0038327214,0.05000,0.789
2,mlp.up_proj,0.0037921627,0.05000,0.796
2,mlp.down_proj,0.0090362954,0.05000,2.958
3,self_attn.v_proj,0.0000121165,0.05000,1.371
3,self_attn.k_proj,0.0000652644,0.05000,1.377
3,self_attn.q_proj,0.0003032616,0.05000,1.398
3,self_attn.o_proj,0.0000070980,0.05000,0.840
3,mlp.gate_proj,0.0050947977,0.05000,1.129
3,mlp.up_proj,0.0043798673,0.05000,1.141
3,mlp.down_proj,0.0001146523,0.05000,2.803
4,self_attn.v_proj,0.0000165725,0.05000,1.741
4,self_attn.q_proj,0.0002473793,0.05000,1.757
4,self_attn.k_proj,0.0000484598,0.05000,1.771
4,self_attn.o_proj,0.0000154044,0.05000,0.452
4,mlp.up_proj,0.0034890500,0.05000,0.945
4,mlp.gate_proj,0.0048000967,0.05000,0.968
4,mlp.down_proj,0.0000431230,0.05000,2.768
5,self_attn.v_proj,0.0000432888,0.05000,1.424
5,self_attn.q_proj,0.0005337124,0.05000,1.427
5,self_attn.k_proj,0.0000954607,0.05000,1.446
5,self_attn.o_proj,0.0000096460,0.05000,0.449
5,mlp.gate_proj,0.0064711852,0.05000,0.952
5,mlp.up_proj,0.0054382997,0.05000,0.964
5,mlp.down_proj,0.0000662426,0.05000,2.780
6,self_attn.v_proj,0.0000374054,0.05000,1.393
6,self_attn.q_proj,0.0003390273,0.05000,1.413
6,self_attn.k_proj,0.0000539993,0.05000,1.431
6,self_attn.o_proj,0.0000239811,0.05000,0.453
6,mlp.gate_proj,0.0083112013,0.05000,0.931
6,mlp.up_proj,0.0070795329,0.05000,0.940
6,mlp.down_proj,0.0000900177,0.05000,2.698
7,self_attn.k_proj,0.0000704335,0.05000,1.509
7,self_attn.q_proj,0.0004461423,0.05000,1.532
7,self_attn.v_proj,0.0000449421,0.05000,1.540
7,self_attn.o_proj,0.0000158974,0.05000,0.479
7,mlp.gate_proj,0.0084789699,0.05000,1.144
7,mlp.up_proj,0.0068957208,0.05000,1.156
7,mlp.down_proj,0.0001276712,0.05000,3.500
8,self_attn.q_proj,0.0004550622,0.05000,1.419
8,self_attn.v_proj,0.0000480385,0.05000,1.423
8,self_attn.k_proj,0.0000688795,0.05000,1.428
8,self_attn.o_proj,0.0000237095,0.05000,0.469
8,mlp.up_proj,0.0065720858,0.05000,0.953
8,mlp.gate_proj,0.0092143638,0.05000,0.957
8,mlp.down_proj,0.0001645900,0.05000,2.690
9,self_attn.q_proj,0.0004010807,0.05000,1.298
9,self_attn.k_proj,0.0000703355,0.05000,1.310
9,self_attn.v_proj,0.0000412660,0.05000,1.319
9,self_attn.o_proj,0.0000372614,0.05000,0.451
9,mlp.gate_proj,0.0086270275,0.05000,0.724
9,mlp.up_proj,0.0057881656,0.05000,0.729
9,mlp.down_proj,0.0001873366,0.05000,2.661
10,self_attn.v_proj,0.0000675373,0.05000,1.464
10,self_attn.k_proj,0.0000842096,0.05000,1.476
10,self_attn.q_proj,0.0005640181,0.05000,1.486
10,self_attn.o_proj,0.0000375405,0.05000,0.465
10,mlp.gate_proj,0.0064309352,0.05000,0.952
10,mlp.up_proj,0.0042183016,0.05000,0.952
10,mlp.down_proj,0.0002274845,0.05000,2.724
11,self_attn.k_proj,0.0000618624,0.05000,1.443
11,self_attn.v_proj,0.0000554348,0.05000,1.452
11,self_attn.q_proj,0.0004188407,0.05000,1.484
11,self_attn.o_proj,0.0000588654,0.05000,0.461
11,mlp.up_proj,0.0024528662,0.05000,0.955
11,mlp.gate_proj,0.0034099502,0.05000,0.960
11,mlp.down_proj,0.0002484049,0.05000,2.826
12,self_attn.k_proj,0.0000663459,0.05000,1.707
12,self_attn.q_proj,0.0004353355,0.05000,1.714
12,self_attn.v_proj,0.0000493771,0.05000,1.720
12,self_attn.o_proj,0.0000569917,0.05000,0.534
12,mlp.gate_proj,0.0037296684,0.05000,0.927
12,mlp.up_proj,0.0026382779,0.05000,0.932
12,mlp.down_proj,0.0002238520,0.05000,2.694
13,self_attn.k_proj,0.0000918726,0.05000,1.413
13,self_attn.v_proj,0.0000332990,0.05000,1.428
13,self_attn.q_proj,0.0005053526,0.05000,1.429
13,self_attn.o_proj,0.0000357019,0.05000,0.451
13,mlp.gate_proj,0.0020309976,0.05000,0.801
13,mlp.up_proj,0.0019490962,0.05000,0.810
13,mlp.down_proj,0.0001979468,0.05000,2.777
14,self_attn.q_proj,0.0003977691,0.05000,1.297
14,self_attn.v_proj,0.0000393797,0.05000,1.304
14,self_attn.k_proj,0.0000605854,0.05000,1.308
14,self_attn.o_proj,0.0000810008,0.05000,0.458
14,mlp.up_proj,0.0018685926,0.05000,0.907
14,mlp.gate_proj,0.0019428260,0.05000,0.913
14,mlp.down_proj,0.0001839744,0.05000,2.739
15,self_attn.k_proj,0.0000632234,0.05000,1.297
15,self_attn.v_proj,0.0000390386,0.05000,1.308
15,self_attn.q_proj,0.0004023011,0.05000,1.323
15,self_attn.o_proj,0.0000734222,0.05000,0.459
15,mlp.up_proj,0.0016407124,0.05000,0.922
15,mlp.gate_proj,0.0015691990,0.05000,0.930
15,mlp.down_proj,0.0001619132,0.05000,2.651
16,self_attn.v_proj,0.0000397472,0.05000,1.758
16,self_attn.q_proj,0.0004093029,0.05000,1.780
16,self_attn.k_proj,0.0000679986,0.05000,1.785
16,self_attn.o_proj,0.0000735932,0.05000,0.714
16,mlp.gate_proj,0.0016409640,0.05000,1.356
16,mlp.up_proj,0.0016108529,0.05000,1.364
16,mlp.down_proj,0.0001433920,0.05000,2.709
17,self_attn.q_proj,0.0006384926,0.05000,1.454
17,self_attn.v_proj,0.0000570526,0.05000,1.470
17,self_attn.k_proj,0.0001091180,0.05000,1.481
17,self_attn.o_proj,0.0000506870,0.05000,0.467
17,mlp.up_proj,0.0014302930,0.05000,0.949
17,mlp.gate_proj,0.0014094597,0.05000,0.954
17,mlp.down_proj,0.0001370377,0.05000,2.728
18,self_attn.q_proj,0.0004070264,0.05000,1.559
18,self_attn.k_proj,0.0000593264,0.05000,1.642
18,self_attn.v_proj,0.0000428977,0.05000,1.651
18,self_attn.o_proj,0.0000611151,0.05000,0.643
18,mlp.up_proj,0.0013951592,0.05000,1.321
18,mlp.gate_proj,0.0014391315,0.05000,1.320
18,mlp.down_proj,0.0001301621,0.05000,4.631
19,self_attn.v_proj,0.0000454103,0.05000,2.170
19,self_attn.k_proj,0.0000802021,0.05000,2.260
19,self_attn.q_proj,0.0004584398,0.05000,2.283
19,self_attn.o_proj,0.0000832018,0.05000,0.855
19,mlp.gate_proj,0.0013592220,0.05000,1.760
19,mlp.up_proj,0.0014207353,0.05000,1.786
19,mlp.down_proj,0.0001298065,0.05000,4.033
20,self_attn.k_proj,0.0000823069,0.05000,1.879
20,self_attn.q_proj,0.0007093750,0.05000,1.935
20,self_attn.v_proj,0.0000864942,0.05000,1.993
20,self_attn.o_proj,0.0000718808,0.05000,0.821
20,mlp.gate_proj,0.0013735232,0.05000,1.072
20,mlp.up_proj,0.0014128388,0.05000,1.215
20,mlp.down_proj,0.0001408066,0.05000,4.534
21,self_attn.k_proj,0.0000800281,0.05000,2.156
21,self_attn.q_proj,0.0005046818,0.05000,2.172
21,self_attn.v_proj,0.0000577273,0.05000,2.274
21,self_attn.o_proj,0.0000763357,0.05000,0.748
21,mlp.gate_proj,0.0014464086,0.05000,1.168
21,mlp.up_proj,0.0013651836,0.05000,1.212
21,mlp.down_proj,0.0001177456,0.05000,2.956
22,self_attn.k_proj,0.0000903085,0.05000,1.766
22,self_attn.q_proj,0.0005909076,0.05000,1.806
22,self_attn.v_proj,0.0000757894,0.05000,1.810
22,self_attn.o_proj,0.0000623799,0.05000,0.526
22,mlp.up_proj,0.0014004772,0.05000,1.257
22,mlp.gate_proj,0.0014311959,0.05000,1.275
22,mlp.down_proj,0.0001264588,0.05000,4.373
23,self_attn.q_proj,0.0006088087,0.05000,2.115
23,self_attn.k_proj,0.0000888351,0.05000,2.155
23,self_attn.v_proj,0.0000476617,0.05000,2.223
23,self_attn.o_proj,0.0000646709,0.05000,0.644
23,mlp.up_proj,0.0014787837,0.05000,1.319
23,mlp.gate_proj,0.0014157394,0.05000,1.447
23,mlp.down_proj,0.0001453594,0.05000,4.296
24,self_attn.v_proj,0.0000535015,0.05000,1.915
24,self_attn.k_proj,0.0000606524,0.05000,1.972
24,self_attn.q_proj,0.0004475276,0.05000,1.990
24,self_attn.o_proj,0.0000783631,0.05000,0.887
24,mlp.gate_proj,0.0013949360,0.05000,1.192
24,mlp.up_proj,0.0014352342,0.05000,1.383
24,mlp.down_proj,0.0001567249,0.05000,4.689
25,self_attn.k_proj,0.0000588555,0.05000,2.278
25,self_attn.v_proj,0.0000853784,0.05000,2.371
25,self_attn.q_proj,0.0006110521,0.05000,2.532
25,self_attn.o_proj,0.0000598715,0.05000,0.982
25,mlp.up_proj,0.0015592789,0.05000,1.336
25,mlp.gate_proj,0.0015332821,0.05000,1.365
25,mlp.down_proj,0.0001992039,0.05000,3.003
26,self_attn.q_proj,0.0004816819,0.05000,1.612
26,self_attn.v_proj,0.0000801456,0.05000,1.647
26,self_attn.k_proj,0.0000613980,0.05000,1.656
26,self_attn.o_proj,0.0000884467,0.05000,0.513
26,mlp.gate_proj,0.0016698752,0.05000,1.040
26,mlp.up_proj,0.0017734253,0.05000,1.054
26,mlp.down_proj,0.0002655405,0.05000,3.050
27,self_attn.v_proj,0.0001156350,0.05000,1.534
27,self_attn.k_proj,0.0000694705,0.05000,1.571
27,self_attn.q_proj,0.0008163267,0.05000,1.579
27,self_attn.o_proj,0.0001265619,0.05000,0.511
27,mlp.up_proj,0.0018934087,0.05000,0.999
27,mlp.gate_proj,0.0018705411,0.05000,1.017
27,mlp.down_proj,0.0003228687,0.05000,2.862
28,self_attn.k_proj,0.0000650905,0.05000,2.290
28,self_attn.q_proj,0.0006038837,0.05000,2.303
28,self_attn.v_proj,0.0001092675,0.05000,2.350
28,self_attn.o_proj,0.0001192229,0.05000,0.755
28,mlp.up_proj,0.0021129013,0.05000,1.005
28,mlp.gate_proj,0.0021004773,0.05000,1.026
28,mlp.down_proj,0.0003793565,0.05000,2.973
29,self_attn.q_proj,0.0005846746,0.05000,1.838
29,self_attn.k_proj,0.0000670443,0.05000,1.869
29,self_attn.v_proj,0.0000986339,0.05000,1.879
29,self_attn.o_proj,0.0000909046,0.05000,0.470
29,mlp.up_proj,0.0025685690,0.05000,1.072
29,mlp.gate_proj,0.0025046702,0.05000,1.101
29,mlp.down_proj,0.0005272932,0.05000,3.049
30,self_attn.v_proj,0.0002365673,0.05000,1.805
30,self_attn.q_proj,0.0008247248,0.05000,1.814
30,self_attn.k_proj,0.0000707403,0.05000,1.832
30,self_attn.o_proj,0.0000990036,0.05000,0.519
30,mlp.up_proj,0.0035084721,0.05000,0.990
30,mlp.gate_proj,0.0032103522,0.05000,1.040
30,mlp.down_proj,0.0053239912,0.05000,2.915
31,self_attn.q_proj,0.0009805325,0.05000,1.691
31,self_attn.v_proj,0.0002592109,0.05000,1.704
31,self_attn.k_proj,0.0000985458,0.05000,1.714
31,self_attn.o_proj,0.0002562517,0.05000,0.501
31,mlp.up_proj,0.0041799812,0.05000,1.191
31,mlp.gate_proj,0.0036576450,0.05000,1.214
31,mlp.down_proj,0.0017657173,0.05000,4.082
32,self_attn.v_proj,0.0007043787,0.05000,2.017
32,self_attn.k_proj,0.0001399740,0.05000,2.077
32,self_attn.q_proj,0.0015637881,0.05000,2.086
32,self_attn.o_proj,0.0002418083,0.05000,0.756
32,mlp.up_proj,0.0043462683,0.05000,1.288
32,mlp.gate_proj,0.0038090997,0.05000,1.308
32,mlp.down_proj,0.0018351650,0.05000,4.499
33,self_attn.v_proj,0.0015008834,0.05000,2.409
33,self_attn.k_proj,0.0001224033,0.05000,2.454
33,self_attn.q_proj,0.0014767564,0.05000,2.478
33,self_attn.o_proj,0.0002690905,0.05000,0.725
33,mlp.gate_proj,0.0035885438,0.05000,1.361
33,mlp.up_proj,0.0044905547,0.05000,1.399
33,mlp.down_proj,0.0034433876,0.05000,4.359
34,self_attn.v_proj,0.0002083482,0.05000,2.172
34,self_attn.q_proj,0.0006153963,0.05000,2.220
34,self_attn.k_proj,0.0000626373,0.05000,2.271
34,self_attn.o_proj,0.0002969657,0.05000,0.690
34,mlp.gate_proj,0.0039339582,0.05000,1.359
34,mlp.up_proj,0.0045975191,0.05000,1.368
34,mlp.down_proj,0.0025222073,0.05000,3.361
35,self_attn.v_proj,0.0001811058,0.05000,1.607
35,self_attn.k_proj,0.0000604469,0.05000,1.616
35,self_attn.q_proj,0.0005729022,0.05000,1.642
35,self_attn.o_proj,0.0002992628,0.05000,0.602
35,mlp.up_proj,0.0064264687,0.05000,1.194
35,mlp.gate_proj,0.0060764383,0.05000,1.285
35,mlp.down_proj,0.0061064307,0.05000,3.911