File size: 10,845 Bytes
3121222
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
layer,module,loss,samples,damp,time
0,mlp.gate_proj,0.0000019575,0.05000,2.282
0,mlp.up_proj,0.0000027544,0.05000,2.295
0,mlp.down_proj,0.0000010484,0.05000,3.485
0,self_attn.o_proj,failsafe(rtn): 0.0018692,0.00000,0.133
0,self_attn.k_proj,0.0000001276,0.05000,1.896
0,self_attn.q_proj,0.0000026756,0.05000,1.900
0,self_attn.v_proj,0.0000002852,0.05000,1.926
0,self_attn.o_gate,0.0000056989,0.05000,0.416
1,mlp.up_proj,0.0000019776,0.05000,2.523
1,mlp.gate_proj,0.0000018764,0.05000,2.584
1,mlp.down_proj,0.0000005803,0.05000,3.835
1,self_attn.v_proj,0.0000274517,0.05000,3.357
1,self_attn.o_proj,0.0000006033,0.05000,3.382
1,self_attn.q_proj,0.0000212897,0.05000,3.414
1,self_attn.k_proj,0.0000216100,0.05000,3.420
2,mlp.gate_proj,0.0000023568,0.05000,2.246
2,mlp.up_proj,0.0000023672,0.05000,2.256
2,mlp.down_proj,0.0000010073,0.05000,3.446
2,self_attn.q_proj,0.0000107203,0.05000,2.823
2,self_attn.v_proj,0.0000126155,0.05000,2.848
2,self_attn.o_proj,0.0000002600,0.05000,2.851
2,self_attn.k_proj,0.0000106647,0.05000,2.854
3,mlp.up_proj,0.0000042655,0.05000,2.150
3,mlp.gate_proj,0.0000048281,0.05000,2.155
3,mlp.down_proj,0.0000014951,0.05000,3.343
3,self_attn.v_proj,0.0000143808,0.05000,2.827
3,self_attn.q_proj,0.0000133468,0.05000,2.856
3,self_attn.o_proj,0.0000003673,0.05000,2.865
3,self_attn.k_proj,0.0000136954,0.05000,2.868
4,mlp.up_proj,0.0000057488,0.05000,2.549
4,mlp.gate_proj,0.0000063297,0.05000,2.552
4,mlp.down_proj,0.0000016230,0.05000,3.892
4,self_attn.q_proj,0.0000106056,0.05000,3.481
4,self_attn.v_proj,0.0000128426,0.05000,3.494
4,self_attn.k_proj,0.0000108418,0.05000,3.509
4,self_attn.o_proj,0.0000002972,0.05000,3.519
5,mlp.up_proj,0.0000071429,0.05000,2.613
5,mlp.gate_proj,0.0000078490,0.05000,2.635
5,mlp.down_proj,0.0000017132,0.05000,3.949
5,self_attn.v_proj,0.0000129791,0.05000,3.126
5,self_attn.k_proj,0.0000121584,0.05000,3.347
5,self_attn.q_proj,0.0000115177,0.05000,3.356
5,self_attn.o_proj,0.0000004817,0.05000,3.377
6,mlp.up_proj,0.0000088197,0.05000,2.675
6,mlp.gate_proj,0.0000095654,0.05000,2.748
6,mlp.down_proj,0.0000019187,0.05000,3.957
6,self_attn.o_proj,0.0000004831,0.05000,2.738
6,self_attn.q_proj,0.0000111541,0.05000,2.795
6,self_attn.k_proj,0.0000133217,0.05000,2.824
6,self_attn.v_proj,0.0000116766,0.05000,2.834
7,mlp.up_proj,0.0000092302,0.05000,1.289
7,mlp.gate_proj,0.0000097952,0.05000,1.293
7,mlp.down_proj,0.0000022242,0.05000,2.475
7,self_attn.v_proj,0.0000109299,0.05000,2.075
7,self_attn.k_proj,0.0000121253,0.05000,2.156
7,self_attn.q_proj,0.0000099677,0.05000,2.286
7,self_attn.o_proj,0.0000005160,0.05000,2.289
8,mlp.gate_proj,0.0000104388,0.05000,1.974
8,mlp.up_proj,0.0000095090,0.05000,2.055
8,mlp.down_proj,0.0000021816,0.05000,3.211
8,self_attn.k_proj,0.0000085051,0.05000,2.614
8,self_attn.v_proj,0.0000075190,0.05000,2.743
8,self_attn.o_proj,0.0000008019,0.05000,2.750
8,self_attn.q_proj,0.0000069249,0.05000,2.772
9,mlp.gate_proj,0.0000114569,0.05000,1.817
9,mlp.up_proj,0.0000109181,0.05000,1.822
9,mlp.down_proj,0.0000028727,0.05000,3.040
9,self_attn.o_proj,failsafe(rtn): 0.0020905,0.00000,0.095
9,self_attn.v_proj,0.0000005232,0.05000,1.285
9,self_attn.k_proj,0.0000008164,0.05000,1.304
9,self_attn.q_proj,0.0000084581,0.05000,1.328
9,self_attn.o_gate,0.0000059179,0.05000,0.404
10,mlp.up_proj,0.0000110046,0.05000,1.756
10,mlp.gate_proj,0.0000113558,0.05000,1.940
10,mlp.down_proj,0.0000026231,0.05000,3.068
10,self_attn.q_proj,0.0000101732,0.05000,2.230
10,self_attn.o_proj,0.0000007549,0.05000,2.262
10,self_attn.k_proj,0.0000127262,0.05000,2.365
10,self_attn.v_proj,0.0000108463,0.05000,2.387
11,mlp.up_proj,0.0000111043,0.05000,2.183
11,mlp.gate_proj,0.0000110840,0.05000,2.187
11,mlp.down_proj,0.0000028018,0.05000,3.373
11,self_attn.v_proj,0.0000078623,0.05000,2.592
11,self_attn.k_proj,0.0000090132,0.05000,2.733
11,self_attn.o_proj,0.0000008556,0.05000,2.759
11,self_attn.q_proj,0.0000073984,0.05000,2.772
12,mlp.gate_proj,0.0000107157,0.05000,2.134
12,mlp.up_proj,0.0000109496,0.05000,2.144
12,mlp.down_proj,0.0000028660,0.05000,3.327
12,self_attn.v_proj,0.0000108852,0.05000,2.999
12,self_attn.q_proj,0.0000093560,0.05000,3.022
12,self_attn.o_proj,0.0000010866,0.05000,3.033
12,self_attn.k_proj,0.0000102958,0.05000,3.039
13,mlp.up_proj,0.0000113037,0.05000,2.134
13,mlp.gate_proj,0.0000108564,0.05000,2.150
13,mlp.down_proj,0.0000030354,0.05000,3.340
13,self_attn.o_proj,0.0000011258,0.05000,2.630
13,self_attn.q_proj,0.0000071202,0.05000,2.637
13,self_attn.v_proj,0.0000072887,0.05000,2.655
13,self_attn.k_proj,0.0000090965,0.05000,2.658
14,mlp.gate_proj,0.0000102864,0.05000,2.116
14,mlp.up_proj,0.0000112935,0.05000,2.131
14,mlp.down_proj,0.0000033973,0.05000,3.311
14,self_attn.q_proj,0.0000077342,0.05000,2.791
14,self_attn.k_proj,0.0000088883,0.05000,2.801
14,self_attn.o_proj,0.0000014352,0.05000,2.806
14,self_attn.v_proj,0.0000076372,0.05000,2.810
15,mlp.up_proj,0.0000109415,0.05000,2.113
15,mlp.gate_proj,0.0000106577,0.05000,2.117
15,mlp.down_proj,0.0000029832,0.05000,3.304
15,self_attn.v_proj,0.0000053197,0.05000,2.740
15,self_attn.k_proj,0.0000064845,0.05000,2.750
15,self_attn.q_proj,0.0000050206,0.05000,2.756
15,self_attn.o_proj,0.0000019126,0.05000,2.758
16,mlp.up_proj,0.0000153010,0.05000,2.529
16,mlp.gate_proj,0.0000156530,0.05000,2.533
16,mlp.down_proj,0.0000057073,0.05000,3.721
16,self_attn.o_proj,failsafe(rtn): 0.0021057,0.00000,0.100
16,self_attn.q_proj,0.0000078978,0.05000,1.962
16,self_attn.k_proj,0.0000007718,0.05000,1.963
16,self_attn.v_proj,0.0000004404,0.05000,1.968
16,self_attn.o_gate,0.0000046133,0.05000,0.430
17,mlp.gate_proj,0.0000172837,0.05000,2.243
17,mlp.up_proj,0.0000157615,0.05000,2.252
17,mlp.down_proj,0.0000047822,0.05000,3.442
17,self_attn.o_proj,failsafe(rtn): 0.0021667,0.00000,0.103
17,self_attn.q_proj,0.0000081782,0.05000,1.955
17,self_attn.k_proj,0.0000007155,0.05000,1.958
17,self_attn.v_proj,0.0000006458,0.05000,1.977
17,self_attn.o_gate,0.0000054807,0.05000,0.410
18,mlp.up_proj,0.0000136515,0.05000,2.710
18,mlp.gate_proj,0.0000141853,0.05000,2.718
18,mlp.down_proj,0.0000036975,0.05000,3.911
18,self_attn.q_proj,0.0000070243,0.05000,3.069
18,self_attn.k_proj,0.0000099378,0.05000,3.079
18,self_attn.v_proj,0.0000070944,0.05000,3.091
18,self_attn.o_proj,0.0000013901,0.05000,3.093
19,mlp.up_proj,0.0000140069,0.05000,2.184
19,mlp.gate_proj,0.0000137057,0.05000,2.188
19,mlp.down_proj,0.0000041433,0.05000,3.382
19,self_attn.v_proj,0.0000092466,0.05000,2.955
19,self_attn.k_proj,0.0000115108,0.05000,2.976
19,self_attn.q_proj,0.0000087993,0.05000,2.978
19,self_attn.o_proj,0.0000013312,0.05000,2.981
20,mlp.gate_proj,0.0000136300,0.05000,2.360
20,mlp.up_proj,0.0000144002,0.05000,2.367
20,mlp.down_proj,0.0000054458,0.05000,3.551
20,self_attn.k_proj,0.0000093558,0.05000,2.430
20,self_attn.v_proj,0.0000069220,0.05000,2.485
20,self_attn.q_proj,0.0000069259,0.05000,2.488
20,self_attn.o_proj,0.0000021510,0.05000,2.491
21,mlp.up_proj,0.0000136941,0.05000,1.901
21,mlp.gate_proj,0.0000127453,0.05000,2.069
21,mlp.down_proj,0.0000064656,0.05000,3.211
21,self_attn.o_proj,0.0000032435,0.05000,2.564
21,self_attn.k_proj,0.0000082270,0.05000,2.652
21,self_attn.q_proj,0.0000072292,0.05000,2.698
21,self_attn.v_proj,0.0000077253,0.05000,2.702
22,mlp.gate_proj,0.0000178435,0.05000,2.226
22,mlp.up_proj,0.0000191504,0.05000,2.237
22,mlp.down_proj,0.0000115692,0.05000,3.429
22,self_attn.o_proj,failsafe(rtn): 0.0022430,0.00000,0.105
22,self_attn.q_proj,0.0000098795,0.05000,1.564
22,self_attn.v_proj,0.0000011001,0.05000,1.603
22,self_attn.k_proj,0.0000006362,0.05000,1.802
22,self_attn.o_gate,0.0000052467,0.05000,0.556
23,mlp.up_proj,0.0000205944,0.05000,2.568
23,mlp.gate_proj,0.0000191698,0.05000,2.574
23,mlp.down_proj,0.0000114501,0.05000,3.838
23,self_attn.k_proj,0.0000123230,0.05000,3.183
23,self_attn.v_proj,0.0000092833,0.05000,3.241
23,self_attn.q_proj,0.0000096182,0.05000,3.242
23,self_attn.o_proj,0.0000029686,0.05000,3.246
24,mlp.up_proj,0.0000243043,0.05000,2.174
24,mlp.gate_proj,0.0000224462,0.05000,2.178
24,mlp.down_proj,0.0000185971,0.05000,3.367
24,self_attn.o_proj,0.0000049888,0.05000,2.975
24,self_attn.v_proj,0.0000083148,0.05000,2.986
24,self_attn.k_proj,0.0000101886,0.05000,2.996
24,self_attn.q_proj,0.0000081873,0.05000,3.004
25,mlp.gate_proj,0.0000260207,0.05000,2.247
25,mlp.up_proj,0.0000286292,0.05000,2.265
25,mlp.down_proj,0.0000327358,0.05000,3.458
25,self_attn.k_proj,0.0000109056,0.05000,3.008
25,self_attn.o_proj,0.0000064882,0.05000,3.029
25,self_attn.v_proj,0.0000084491,0.05000,3.036
25,self_attn.q_proj,0.0000085720,0.05000,3.038
26,mlp.gate_proj,0.0000290269,0.05000,2.488
26,mlp.up_proj,0.0000328542,0.05000,2.502
26,mlp.down_proj,0.0000283380,0.05000,3.690
26,self_attn.o_proj,0.0000122008,0.05000,2.982
26,self_attn.k_proj,0.0000195720,0.05000,2.992
26,self_attn.q_proj,0.0000137304,0.05000,2.997
26,self_attn.v_proj,0.0000134189,0.05000,2.999
27,mlp.up_proj,0.0000379276,0.05000,2.567
27,mlp.gate_proj,0.0000329030,0.05000,2.567
27,mlp.down_proj,0.0000369176,0.05000,3.824
27,self_attn.v_proj,0.0000120097,0.05000,2.916
27,self_attn.q_proj,0.0000128456,0.05000,2.921
27,self_attn.o_proj,0.0000076826,0.05000,2.924
27,self_attn.k_proj,0.0000141221,0.05000,2.928
28,mlp.gate_proj,0.0000373178,0.05000,2.244
28,mlp.up_proj,0.0000440524,0.05000,2.267
28,mlp.down_proj,0.0000514231,0.05000,3.471
28,self_attn.k_proj,0.0000228180,0.05000,3.043
28,self_attn.v_proj,0.0000169691,0.05000,3.054
28,self_attn.q_proj,0.0000164989,0.05000,3.059
28,self_attn.o_proj,0.0000238898,0.05000,3.062
29,mlp.up_proj,0.0000564963,0.05000,2.573
29,mlp.gate_proj,0.0000464832,0.05000,2.598
29,mlp.down_proj,0.0000851531,0.05000,3.805
29,self_attn.o_proj,failsafe(rtn): 0.0025024,0.00000,0.113
29,self_attn.v_proj,0.0000107679,0.05000,2.037
29,self_attn.q_proj,0.0000211549,0.05000,2.046
29,self_attn.k_proj,0.0000011045,0.05000,2.075
29,self_attn.o_gate,0.0000234260,0.05000,0.413
30,mlp.gate_proj,0.0000589105,0.05000,2.553
30,mlp.up_proj,0.0000711039,0.05000,2.561
30,mlp.down_proj,0.0001752567,0.05000,3.852
30,self_attn.o_proj,failsafe(rtn): 0.0025940,0.00000,0.099
30,self_attn.k_proj,0.0000012830,0.05000,2.137
30,self_attn.q_proj,0.0000324827,0.05000,2.140
30,self_attn.v_proj,0.0000334367,0.05000,2.190
30,self_attn.o_gate,0.0000373244,0.05000,0.436
31,mlp.up_proj,0.0001038903,0.05000,2.291
31,mlp.gate_proj,0.0000930895,0.05000,2.361
31,mlp.down_proj,0.0007491196,0.05000,3.548
31,self_attn.o_proj,failsafe(rtn): 0.0024261,0.00000,0.107
31,self_attn.k_proj,0.0000008259,0.05000,2.147
31,self_attn.v_proj,0.0000051840,0.05000,2.153
31,self_attn.q_proj,0.0000221746,0.05000,2.155
31,self_attn.o_gate,0.0000232037,0.05000,0.409