File size: 11,667 Bytes
d4d173b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
layer,module,loss,samples,damp,time
0,self_attn.q_proj,0.0000003401,0.05000,2.345
0,self_attn.k_proj,0.0000000715,0.05000,2.364
0,self_attn.v_proj,0.0000000085,0.05000,2.368
0,self_attn.o_proj,0.0000000277,0.05000,0.771
0,mlp.gate_proj,0.0000003923,0.05000,1.354
0,mlp.up_proj,0.0000003028,0.05000,1.358
0,mlp.down_proj,0.0000000544,0.05000,4.481
1,self_attn.k_proj,0.0000000173,0.05000,2.555
1,self_attn.q_proj,0.0000000622,0.05000,2.587
1,self_attn.v_proj,0.0000000033,0.05000,2.622
1,self_attn.o_proj,0.0000000095,0.05000,0.750
1,mlp.gate_proj,0.0000222209,0.05000,1.368
1,mlp.up_proj,0.0000169856,0.05000,1.377
1,mlp.down_proj,0.0000000056,0.05000,4.306
2,self_attn.q_proj,0.0000000956,0.05000,2.530
2,self_attn.k_proj,0.0000000210,0.05000,2.544
2,self_attn.v_proj,0.0000000054,0.05000,2.568
2,self_attn.o_proj,0.0000000061,0.05000,0.756
2,mlp.gate_proj,0.0000048486,0.05000,1.414
2,mlp.up_proj,0.0000049113,0.05000,1.432
2,mlp.down_proj,0.0000085108,0.05000,4.318
3,self_attn.q_proj,0.0000004073,0.05000,2.616
3,self_attn.k_proj,0.0000000878,0.05000,2.670
3,self_attn.v_proj,0.0000000161,0.05000,2.678
3,self_attn.o_proj,0.0000000094,0.05000,0.782
3,mlp.up_proj,0.0000058005,0.05000,1.375
3,mlp.gate_proj,0.0000064317,0.05000,1.390
3,mlp.down_proj,0.0000001039,0.05000,4.202
4,self_attn.q_proj,0.0000003292,0.05000,2.495
4,self_attn.v_proj,0.0000000221,0.05000,2.539
4,self_attn.k_proj,0.0000000643,0.05000,2.549
4,self_attn.o_proj,0.0000000181,0.05000,0.765
4,mlp.gate_proj,0.0000060889,0.05000,1.356
4,mlp.up_proj,0.0000046043,0.05000,1.358
4,mlp.down_proj,0.0000000563,0.05000,4.267
5,self_attn.k_proj,0.0000001253,0.05000,2.652
5,self_attn.v_proj,0.0000000582,0.05000,2.677
5,self_attn.q_proj,0.0000007062,0.05000,2.693
5,self_attn.o_proj,0.0000000114,0.05000,0.745
5,mlp.gate_proj,0.0000084792,0.05000,1.395
5,mlp.up_proj,0.0000071883,0.05000,1.400
5,mlp.down_proj,0.0000001014,0.05000,4.247
6,self_attn.k_proj,0.0000000723,0.05000,2.712
6,self_attn.q_proj,0.0000004542,0.05000,2.727
6,self_attn.v_proj,0.0000000503,0.05000,2.746
6,self_attn.o_proj,0.0000000305,0.05000,0.753
6,mlp.gate_proj,0.0000109521,0.05000,1.381
6,mlp.up_proj,0.0000094474,0.05000,1.390
6,mlp.down_proj,0.0000001409,0.05000,4.240
7,self_attn.q_proj,0.0000006046,0.05000,2.634
7,self_attn.k_proj,0.0000000955,0.05000,2.698
7,self_attn.v_proj,0.0000000611,0.05000,2.720
7,self_attn.o_proj,0.0000000201,0.05000,0.758
7,mlp.gate_proj,0.0000111783,0.05000,1.347
7,mlp.up_proj,0.0000092888,0.05000,1.354
7,mlp.down_proj,0.0000001957,0.05000,4.267
8,self_attn.q_proj,0.0000006208,0.05000,2.460
8,self_attn.k_proj,0.0000000936,0.05000,2.473
8,self_attn.v_proj,0.0000000658,0.05000,2.481
8,self_attn.o_proj,0.0000000297,0.05000,0.755
8,mlp.up_proj,0.0000088363,0.05000,1.354
8,mlp.gate_proj,0.0000123492,0.05000,1.358
8,mlp.down_proj,0.0000002615,0.05000,4.249
9,self_attn.q_proj,0.0000005419,0.05000,2.480
9,self_attn.k_proj,0.0000000949,0.05000,2.487
9,self_attn.v_proj,0.0000000561,0.05000,2.517
9,self_attn.o_proj,0.0000000487,0.05000,0.757
9,mlp.gate_proj,0.0000116062,0.05000,1.368
9,mlp.up_proj,0.0000078157,0.05000,1.375
9,mlp.down_proj,0.0000003051,0.05000,4.267
10,self_attn.q_proj,0.0000007660,0.05000,2.545
10,self_attn.v_proj,0.0000000931,0.05000,2.556
10,self_attn.k_proj,0.0000001141,0.05000,2.560
10,self_attn.o_proj,0.0000000494,0.05000,0.749
10,mlp.gate_proj,0.0000087138,0.05000,1.360
10,mlp.up_proj,0.0000057507,0.05000,1.366
10,mlp.down_proj,0.0000003635,0.05000,4.293
11,self_attn.q_proj,0.0000005695,0.05000,2.428
11,self_attn.v_proj,0.0000000758,0.05000,2.446
11,self_attn.k_proj,0.0000000838,0.05000,2.454
11,self_attn.o_proj,0.0000000737,0.05000,0.767
11,mlp.gate_proj,0.0000047200,0.05000,1.349
11,mlp.up_proj,0.0000033895,0.05000,1.357
11,mlp.down_proj,0.0000003966,0.05000,4.351
12,self_attn.k_proj,0.0000000907,0.05000,2.451
12,self_attn.v_proj,0.0000000689,0.05000,2.479
12,self_attn.q_proj,0.0000005998,0.05000,2.492
12,self_attn.o_proj,0.0000000781,0.05000,0.750
12,mlp.gate_proj,0.0000052030,0.05000,1.376
12,mlp.up_proj,0.0000036776,0.05000,1.383
12,mlp.down_proj,0.0000003800,0.05000,4.308
13,self_attn.k_proj,0.0000361445,0.05000,2.567
13,self_attn.q_proj,0.0001988869,0.05000,2.576
13,self_attn.v_proj,0.0000131335,0.05000,2.590
13,self_attn.o_proj,0.0000135128,0.05000,0.761
13,mlp.up_proj,0.0007943917,0.05000,1.410
13,mlp.gate_proj,0.0008288599,0.05000,1.423
13,mlp.down_proj,0.0000983028,0.05000,4.245
14,self_attn.k_proj,0.0000000836,0.05000,2.513
14,self_attn.v_proj,0.0000000555,0.05000,2.535
14,self_attn.q_proj,0.0000005526,0.05000,2.542
14,self_attn.o_proj,0.0000001077,0.05000,0.758
14,mlp.gate_proj,0.0000027660,0.05000,1.381
14,mlp.up_proj,0.0000026624,0.05000,1.387
14,mlp.down_proj,0.0000003076,0.05000,4.313
15,self_attn.v_proj,0.0000000538,0.05000,2.525
15,self_attn.k_proj,0.0000000867,0.05000,2.572
15,self_attn.q_proj,0.0000005570,0.05000,2.578
15,self_attn.o_proj,0.0000000963,0.05000,0.750
15,mlp.up_proj,0.0000023406,0.05000,1.377
15,mlp.gate_proj,0.0000022434,0.05000,1.380
15,mlp.down_proj,0.0000002715,0.05000,4.259
16,self_attn.q_proj,0.0001674280,0.05000,2.427
16,self_attn.v_proj,0.0000162792,0.05000,2.520
16,self_attn.k_proj,0.0000277001,0.05000,2.537
16,self_attn.o_proj,0.0000293571,0.05000,0.758
16,mlp.up_proj,0.0006605779,0.05000,1.385
16,mlp.gate_proj,0.0006744062,0.05000,1.399
16,mlp.down_proj,0.0000691259,0.05000,4.257
17,self_attn.k_proj,0.0000001538,0.05000,2.463
17,self_attn.q_proj,0.0000009075,0.05000,2.471
17,self_attn.v_proj,0.0000000858,0.05000,2.469
17,self_attn.o_proj,0.0000000672,0.05000,0.773
17,mlp.up_proj,0.0000020540,0.05000,1.401
17,mlp.gate_proj,0.0000020261,0.05000,1.408
17,mlp.down_proj,0.0000002283,0.05000,4.257
18,self_attn.v_proj,0.0000000613,0.05000,2.509
18,self_attn.k_proj,0.0000000836,0.05000,2.535
18,self_attn.q_proj,0.0000005755,0.05000,2.543
18,self_attn.o_proj,0.0000000861,0.05000,0.763
18,mlp.gate_proj,0.0000020542,0.05000,1.371
18,mlp.up_proj,0.0000019867,0.05000,1.384
18,mlp.down_proj,0.0000002142,0.05000,4.281
19,self_attn.v_proj,0.0000000633,0.05000,2.534
19,self_attn.k_proj,0.0000001105,0.05000,2.549
19,self_attn.q_proj,0.0000006358,0.05000,2.559
19,self_attn.o_proj,0.0000001115,0.05000,0.778
19,mlp.gate_proj,0.0000019188,0.05000,1.345
19,mlp.up_proj,0.0000020043,0.05000,1.350
19,mlp.down_proj,0.0000002130,0.05000,4.248
20,self_attn.v_proj,0.0000001238,0.05000,2.480
20,self_attn.q_proj,0.0000009841,0.05000,2.509
20,self_attn.k_proj,0.0000001128,0.05000,2.515
20,self_attn.o_proj,0.0000000938,0.05000,0.753
20,mlp.gate_proj,0.0000019638,0.05000,1.350
20,mlp.up_proj,0.0000019977,0.05000,1.360
20,mlp.down_proj,0.0000002298,0.05000,4.285
21,self_attn.v_proj,0.0000000831,0.05000,2.684
21,self_attn.k_proj,0.0000001138,0.05000,2.703
21,self_attn.q_proj,0.0000007205,0.05000,2.715
21,self_attn.o_proj,0.0000001049,0.05000,0.765
21,mlp.up_proj,0.0000019671,0.05000,1.369
21,mlp.gate_proj,0.0000021018,0.05000,1.374
21,mlp.down_proj,0.0000002018,0.05000,4.270
22,self_attn.v_proj,0.0000001127,0.05000,2.530
22,self_attn.q_proj,0.0000008507,0.05000,2.575
22,self_attn.k_proj,0.0000001312,0.05000,2.581
22,self_attn.o_proj,0.0000000927,0.05000,0.761
22,mlp.gate_proj,0.0000021110,0.05000,1.372
22,mlp.up_proj,0.0000020616,0.05000,1.376
22,mlp.down_proj,0.0000002235,0.05000,4.254
23,self_attn.v_proj,0.0000000670,0.05000,2.514
23,self_attn.k_proj,0.0000001256,0.05000,2.541
23,self_attn.q_proj,0.0000008720,0.05000,2.550
23,self_attn.o_proj,0.0000000902,0.05000,0.759
23,mlp.gate_proj,0.0000021169,0.05000,1.350
23,mlp.up_proj,0.0000022067,0.05000,1.356
23,mlp.down_proj,0.0000002553,0.05000,4.296
24,self_attn.k_proj,0.0000255149,0.05000,2.405
24,self_attn.v_proj,0.0000224293,0.05000,2.434
24,self_attn.q_proj,0.0001882081,0.05000,2.442
24,self_attn.o_proj,0.0000325786,0.05000,0.769
24,mlp.up_proj,0.0006212120,0.05000,1.378
24,mlp.gate_proj,0.0006047986,0.05000,1.390
24,mlp.down_proj,0.0000792528,0.05000,4.284
25,self_attn.v_proj,0.0000001261,0.05000,2.402
25,self_attn.k_proj,0.0000000865,0.05000,2.413
25,self_attn.q_proj,0.0000009028,0.05000,2.425
25,self_attn.o_proj,0.0000000856,0.05000,0.752
25,mlp.gate_proj,0.0000023320,0.05000,1.363
25,mlp.up_proj,0.0000023642,0.05000,1.368
25,mlp.down_proj,0.0000003521,0.05000,4.316
26,self_attn.v_proj,0.0000345133,0.05000,2.484
26,self_attn.q_proj,0.0002079831,0.05000,2.532
26,self_attn.k_proj,0.0000266343,0.05000,2.539
26,self_attn.o_proj,0.0000372935,0.05000,0.759
26,mlp.up_proj,0.0007854277,0.05000,1.365
26,mlp.gate_proj,0.0007399612,0.05000,1.366
26,mlp.down_proj,0.0001390844,0.05000,4.290
27,self_attn.q_proj,0.0000012426,0.05000,2.442
27,self_attn.k_proj,0.0000001063,0.05000,2.455
27,self_attn.v_proj,0.0000001755,0.05000,2.464
27,self_attn.o_proj,0.0000001954,0.05000,0.743
27,mlp.gate_proj,0.0000029087,0.05000,1.346
27,mlp.up_proj,0.0000029363,0.05000,1.359
27,mlp.down_proj,0.0000006022,0.05000,4.340
28,self_attn.k_proj,0.0000283726,0.05000,2.394
28,self_attn.v_proj,0.0000471470,0.05000,2.405
28,self_attn.q_proj,0.0002638888,0.05000,2.417
28,self_attn.o_proj,0.0000527611,0.05000,0.737
28,mlp.gate_proj,0.0009632122,0.05000,1.353
28,mlp.up_proj,0.0009681229,0.05000,1.360
28,mlp.down_proj,0.0002143012,0.05000,4.288
29,self_attn.v_proj,0.0000444782,0.05000,2.417
29,self_attn.k_proj,0.0000306777,0.05000,2.451
29,self_attn.q_proj,0.0002660261,0.05000,2.460
29,self_attn.o_proj,0.0000419030,0.05000,0.753
29,mlp.gate_proj,0.0011661545,0.05000,1.349
29,mlp.up_proj,0.0011946059,0.05000,1.355
29,mlp.down_proj,0.0002983677,0.05000,4.321
30,self_attn.v_proj,0.0000003594,0.05000,2.520
30,self_attn.k_proj,0.0000001052,0.05000,2.556
30,self_attn.q_proj,0.0000012281,0.05000,2.563
30,self_attn.o_proj,0.0000001564,0.05000,0.763
30,mlp.gate_proj,0.0000051266,0.05000,1.384
30,mlp.up_proj,0.0000055988,0.05000,1.389
30,mlp.down_proj,0.0000080552,0.05000,4.288
31,self_attn.q_proj,0.0000014235,0.05000,2.486
31,self_attn.k_proj,0.0000001413,0.05000,2.555
31,self_attn.v_proj,0.0000003790,0.05000,2.578
31,self_attn.o_proj,0.0000003545,0.05000,0.753
31,mlp.gate_proj,0.0000059219,0.05000,1.385
31,mlp.up_proj,0.0000067765,0.05000,1.389
31,mlp.down_proj,0.0000033691,0.05000,4.295
32,self_attn.q_proj,0.0000022495,0.05000,2.460
32,self_attn.k_proj,0.0000001965,0.05000,2.467
32,self_attn.v_proj,0.0000009772,0.05000,2.481
32,self_attn.o_proj,0.0000003497,0.05000,0.767
32,mlp.gate_proj,0.0000061569,0.05000,1.398
32,mlp.up_proj,0.0000070279,0.05000,1.404
32,mlp.down_proj,0.0000036677,0.05000,4.273
33,self_attn.k_proj,0.0000001776,0.05000,2.578
33,self_attn.v_proj,0.0000021733,0.05000,2.592
33,self_attn.q_proj,0.0000022470,0.05000,2.597
33,self_attn.o_proj,0.0000004441,0.05000,0.758
33,mlp.gate_proj,0.0000059196,0.05000,1.358
33,mlp.up_proj,0.0000074468,0.05000,1.363
33,mlp.down_proj,0.0000068785,0.05000,4.258
34,self_attn.k_proj,0.0000001017,0.05000,2.520
34,self_attn.q_proj,0.0000009964,0.05000,2.541
34,self_attn.v_proj,0.0000003421,0.05000,2.558
34,self_attn.o_proj,0.0000004992,0.05000,0.785
34,mlp.up_proj,0.0000077734,0.05000,1.347
34,mlp.gate_proj,0.0000066457,0.05000,1.352
34,mlp.down_proj,0.0000052847,0.05000,4.240
35,self_attn.k_proj,0.0000001003,0.05000,2.693
35,self_attn.v_proj,0.0000003007,0.05000,2.721
35,self_attn.q_proj,0.0000009509,0.05000,2.724
35,self_attn.o_proj,0.0000005282,0.05000,0.765
35,mlp.gate_proj,0.0000101811,0.05000,1.361
35,mlp.up_proj,0.0000107798,0.05000,1.373
35,mlp.down_proj,0.0000105316,0.05000,4.297