File size: 11,667 Bytes
f5a0f3a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
layer,module,loss,samples,damp,time
0,self_attn.v_proj,0.0000000372,0.05000,4.540
0,self_attn.q_proj,0.0000001598,0.05000,4.596
0,self_attn.k_proj,0.0000000433,0.05000,4.618
0,self_attn.o_proj,0.0000000962,0.05000,1.349
0,mlp.gate_proj,0.0000104463,0.05000,1.920
0,mlp.up_proj,0.0000091199,0.05000,1.936
0,mlp.down_proj,0.0000010827,0.05000,3.513
1,self_attn.k_proj,0.0000000738,0.05000,4.644
1,self_attn.v_proj,0.0000000748,0.05000,4.684
1,self_attn.q_proj,0.0000002716,0.05000,4.696
1,self_attn.o_proj,0.0000001209,0.05000,1.631
1,mlp.up_proj,0.0001685409,0.05000,2.393
1,mlp.gate_proj,0.0003610970,0.05000,2.439
1,mlp.down_proj,0.0000016096,0.05000,3.772
2,self_attn.q_proj,0.0000007576,0.05000,4.689
2,self_attn.k_proj,0.0000002197,0.05000,4.747
2,self_attn.v_proj,0.0000002069,0.05000,4.778
2,self_attn.o_proj,0.0000001543,0.05000,1.372
2,mlp.gate_proj,0.0006330384,0.05000,1.902
2,mlp.up_proj,0.0005052607,0.05000,1.925
2,mlp.down_proj,0.0000010256,0.05000,3.669
3,self_attn.v_proj,0.0000003695,0.05000,3.944
3,self_attn.q_proj,0.0000014327,0.05000,3.994
3,self_attn.k_proj,0.0000003736,0.05000,4.033
3,self_attn.o_proj,0.0000002812,0.05000,1.402
3,mlp.gate_proj,0.0005489127,0.05000,1.989
3,mlp.up_proj,0.0002831875,0.05000,2.018
3,mlp.down_proj,0.0000025971,0.05000,3.661
4,self_attn.v_proj,0.0000007139,0.05000,4.002
4,self_attn.q_proj,0.0000026597,0.05000,4.038
4,self_attn.k_proj,0.0000007332,0.05000,4.080
4,self_attn.o_proj,0.0000004123,0.05000,1.373
4,mlp.gate_proj,0.0004671095,0.05000,1.857
4,mlp.up_proj,0.0001758254,0.05000,1.876
4,mlp.down_proj,0.0000032607,0.05000,3.646
5,self_attn.v_proj,0.0000007750,0.05000,4.157
5,self_attn.q_proj,0.0000028777,0.05000,4.259
5,self_attn.k_proj,0.0000007445,0.05000,4.266
5,self_attn.o_proj,0.0000007169,0.05000,1.398
5,mlp.up_proj,0.0000758844,0.05000,1.903
5,mlp.gate_proj,0.0001681334,0.05000,1.902
5,mlp.down_proj,0.0000047785,0.05000,3.754
6,self_attn.v_proj,0.0000016682,0.05000,4.165
6,self_attn.q_proj,0.0000062852,0.05000,4.216
6,self_attn.k_proj,0.0000015054,0.05000,4.229
6,self_attn.o_proj,0.0000016241,0.05000,1.420
6,mlp.up_proj,0.0000998887,0.05000,1.973
6,mlp.gate_proj,0.0001912913,0.05000,1.975
6,mlp.down_proj,0.0001258073,0.05000,3.691
7,self_attn.q_proj,0.0000131708,0.05000,4.055
7,self_attn.v_proj,0.0000034307,0.05000,4.095
7,self_attn.k_proj,0.0000036401,0.05000,4.149
7,self_attn.o_proj,0.0000020525,0.05000,1.392
7,mlp.up_proj,0.0001153550,0.05000,1.908
7,mlp.gate_proj,0.0002064913,0.05000,1.924
7,mlp.down_proj,0.0000085862,0.05000,3.667
8,self_attn.v_proj,0.0000052083,0.05000,4.059
8,self_attn.q_proj,0.0000193134,0.05000,4.104
8,self_attn.k_proj,0.0000050779,0.05000,4.123
8,self_attn.o_proj,0.0000026441,0.05000,1.434
8,mlp.up_proj,0.0001105051,0.05000,1.911
8,mlp.gate_proj,0.0001367360,0.05000,1.933
8,mlp.down_proj,0.0000123248,0.05000,3.637
9,self_attn.q_proj,0.0000244685,0.05000,3.993
9,self_attn.v_proj,0.0000062576,0.05000,4.090
9,self_attn.k_proj,0.0000071317,0.05000,4.112
9,self_attn.o_proj,0.0000033297,0.05000,1.387
9,mlp.up_proj,0.0001365071,0.05000,2.040
9,mlp.gate_proj,0.0002204533,0.05000,2.060
9,mlp.down_proj,0.0000117399,0.05000,3.677
10,self_attn.k_proj,0.0000091113,0.05000,4.005
10,self_attn.v_proj,0.0000097468,0.05000,4.016
10,self_attn.q_proj,0.0000349093,0.05000,4.072
10,self_attn.o_proj,0.0000044036,0.05000,1.447
10,mlp.gate_proj,0.0001647009,0.05000,1.997
10,mlp.up_proj,0.0001119096,0.05000,2.002
10,mlp.down_proj,0.0000092788,0.05000,3.657
11,self_attn.k_proj,0.0000046750,0.05000,4.090
11,self_attn.v_proj,0.0000043463,0.05000,4.134
11,self_attn.q_proj,0.0000167216,0.05000,4.149
11,self_attn.o_proj,0.0000024583,0.05000,1.430
11,mlp.gate_proj,0.0001339980,0.05000,1.945
11,mlp.up_proj,0.0001037463,0.05000,1.967
11,mlp.down_proj,0.0000093708,0.05000,3.644
12,self_attn.v_proj,0.0000054699,0.05000,4.053
12,self_attn.k_proj,0.0000055857,0.05000,4.050
12,self_attn.q_proj,0.0000204089,0.05000,4.070
12,self_attn.o_proj,0.0000035075,0.05000,1.391
12,mlp.up_proj,0.0001045358,0.05000,2.000
12,mlp.gate_proj,0.0001222810,0.05000,2.022
12,mlp.down_proj,0.0000097573,0.05000,3.707
13,self_attn.k_proj,0.0000043581,0.05000,3.889
13,self_attn.q_proj,0.0000159966,0.05000,4.022
13,self_attn.v_proj,0.0000038494,0.05000,4.046
13,self_attn.o_proj,0.0000029048,0.05000,1.381
13,mlp.up_proj,0.0001091306,0.05000,1.877
13,mlp.gate_proj,0.0001162593,0.05000,1.907
13,mlp.down_proj,0.0000108037,0.05000,3.634
14,self_attn.q_proj,0.0000260713,0.05000,3.995
14,self_attn.k_proj,0.0000068275,0.05000,4.027
14,self_attn.v_proj,0.0000067319,0.05000,4.065
14,self_attn.o_proj,0.0000044881,0.05000,1.401
14,mlp.up_proj,0.0001112787,0.05000,1.968
14,mlp.gate_proj,0.0001179207,0.05000,1.966
14,mlp.down_proj,0.0000101188,0.05000,3.620
15,self_attn.v_proj,0.0000062567,0.05000,4.044
15,self_attn.q_proj,0.0000269484,0.05000,4.059
15,self_attn.k_proj,0.0000071068,0.05000,4.097
15,self_attn.o_proj,0.0000038186,0.05000,1.407
15,mlp.gate_proj,0.0001098568,0.05000,1.904
15,mlp.up_proj,0.0001071991,0.05000,1.925
15,mlp.down_proj,0.0000096914,0.05000,3.681
16,self_attn.v_proj,0.0000123046,0.05000,3.948
16,self_attn.k_proj,0.0000116075,0.05000,4.038
16,self_attn.q_proj,0.0000466369,0.05000,4.046
16,self_attn.o_proj,0.0000044731,0.05000,1.404
16,mlp.up_proj,0.0001113207,0.05000,1.917
16,mlp.gate_proj,0.0001211038,0.05000,1.924
16,mlp.down_proj,0.0000872585,0.05000,3.648
17,self_attn.k_proj,0.0000090312,0.05000,4.024
17,self_attn.v_proj,0.0000093076,0.05000,4.073
17,self_attn.q_proj,0.0000403532,0.05000,4.105
17,self_attn.o_proj,0.0000043949,0.05000,1.497
17,mlp.up_proj,0.0000990942,0.05000,1.850
17,mlp.gate_proj,0.0001068454,0.05000,1.870
17,mlp.down_proj,0.0000089522,0.05000,3.610
18,self_attn.k_proj,0.0000108056,0.05000,4.046
18,self_attn.q_proj,0.0000436995,0.05000,4.096
18,self_attn.v_proj,0.0000111328,0.05000,4.135
18,self_attn.o_proj,0.0000057354,0.05000,1.378
18,mlp.up_proj,0.0001083605,0.05000,1.924
18,mlp.gate_proj,0.0001144615,0.05000,1.945
18,mlp.down_proj,0.0000113469,0.05000,3.680
19,self_attn.v_proj,0.0000194956,0.05000,4.021
19,self_attn.k_proj,0.0000207048,0.05000,4.103
19,self_attn.q_proj,0.0000867269,0.05000,4.125
19,self_attn.o_proj,0.0000082300,0.05000,1.408
19,mlp.up_proj,0.0001204733,0.05000,1.885
19,mlp.gate_proj,0.0001256971,0.05000,1.883
19,mlp.down_proj,0.0000149575,0.05000,3.666
20,self_attn.v_proj,0.0000181873,0.05000,3.973
20,self_attn.k_proj,0.0000184160,0.05000,4.048
20,self_attn.q_proj,0.0000832375,0.05000,4.065
20,self_attn.o_proj,0.0000077851,0.05000,1.456
20,mlp.gate_proj,0.0001373728,0.05000,1.933
20,mlp.up_proj,0.0001342345,0.05000,1.953
20,mlp.down_proj,0.0000174853,0.05000,3.660
21,self_attn.v_proj,0.0000244685,0.05000,3.980
21,self_attn.k_proj,0.0000240417,0.05000,4.051
21,self_attn.q_proj,0.0001041426,0.05000,4.056
21,self_attn.o_proj,0.0000098543,0.05000,1.411
21,mlp.up_proj,0.0001608356,0.05000,1.880
21,mlp.gate_proj,0.0001651234,0.05000,1.899
21,mlp.down_proj,0.0000237775,0.05000,3.748
22,self_attn.q_proj,0.0002165873,0.05000,4.104
22,self_attn.v_proj,0.0000505528,0.05000,4.155
22,self_attn.k_proj,0.0000500092,0.05000,4.182
22,self_attn.o_proj,0.0000204702,0.05000,1.381
22,mlp.up_proj,0.0001821093,0.05000,1.927
22,mlp.gate_proj,0.0001975882,0.05000,1.943
22,mlp.down_proj,0.0000375069,0.05000,3.653
23,self_attn.q_proj,0.0002052678,0.05000,4.067
23,self_attn.k_proj,0.0000481465,0.05000,4.102
23,self_attn.v_proj,0.0000507366,0.05000,4.123
23,self_attn.o_proj,0.0000210706,0.05000,1.465
23,mlp.up_proj,0.0002144172,0.05000,1.990
23,mlp.gate_proj,0.0002453108,0.05000,2.009
23,mlp.down_proj,0.0000515478,0.05000,3.654
24,self_attn.k_proj,0.0000796690,0.05000,4.027
24,self_attn.v_proj,0.0000970791,0.05000,4.076
24,self_attn.q_proj,0.0003320909,0.05000,4.100
24,self_attn.o_proj,0.0000288356,0.05000,1.429
24,mlp.gate_proj,0.0002778602,0.05000,1.944
24,mlp.up_proj,0.0002385447,0.05000,1.965
24,mlp.down_proj,0.0000658266,0.05000,3.663
25,self_attn.q_proj,0.0002312621,0.05000,3.981
25,self_attn.v_proj,0.0000614569,0.05000,4.023
25,self_attn.k_proj,0.0000580671,0.05000,4.076
25,self_attn.o_proj,0.0000149890,0.05000,1.393
25,mlp.up_proj,0.0002772560,0.05000,1.910
25,mlp.gate_proj,0.0003277600,0.05000,1.930
25,mlp.down_proj,0.0000812657,0.05000,3.608
26,self_attn.k_proj,0.0000911344,0.05000,3.968
26,self_attn.v_proj,0.0001004724,0.05000,3.987
26,self_attn.q_proj,0.0003837158,0.05000,4.036
26,self_attn.o_proj,0.0000154729,0.05000,1.449
26,mlp.up_proj,0.0003514904,0.05000,1.952
26,mlp.gate_proj,0.0004019208,0.05000,1.982
26,mlp.down_proj,0.0001047065,0.05000,3.641
27,self_attn.k_proj,0.0001113520,0.05000,4.056
27,self_attn.v_proj,0.0001370804,0.05000,4.102
27,self_attn.q_proj,0.0004825537,0.05000,4.124
27,self_attn.o_proj,0.0000248997,0.05000,1.391
27,mlp.gate_proj,0.0004625157,0.05000,2.020
27,mlp.up_proj,0.0004192917,0.05000,2.036
27,mlp.down_proj,0.0001594466,0.05000,3.677
28,self_attn.k_proj,0.0001383105,0.05000,4.090
28,self_attn.q_proj,0.0005395452,0.05000,4.121
28,self_attn.v_proj,0.0001479625,0.05000,4.168
28,self_attn.o_proj,0.0000397905,0.05000,1.428
28,mlp.up_proj,0.0005127904,0.05000,1.930
28,mlp.gate_proj,0.0005403935,0.05000,1.953
28,mlp.down_proj,0.0002484342,0.05000,3.655
29,self_attn.q_proj,0.0013496861,0.05000,3.944
29,self_attn.v_proj,0.0004323819,0.05000,4.024
29,self_attn.k_proj,0.0003228422,0.05000,4.047
29,self_attn.o_proj,0.0000484061,0.05000,1.422
29,mlp.gate_proj,0.0006483419,0.05000,1.925
29,mlp.up_proj,0.0006496695,0.05000,1.925
29,mlp.down_proj,0.0002600003,0.05000,3.624
30,self_attn.q_proj,0.0015428930,0.05000,3.853
30,self_attn.v_proj,0.0004768748,0.05000,3.943
30,self_attn.k_proj,0.0003945851,0.05000,3.970
30,self_attn.o_proj,0.0000822501,0.05000,1.387
30,mlp.gate_proj,0.0006952679,0.05000,1.906
30,mlp.up_proj,0.0007226467,0.05000,1.926
30,mlp.down_proj,0.0003748471,0.05000,3.639
31,self_attn.k_proj,0.0005569797,0.05000,4.087
31,self_attn.q_proj,0.0020152309,0.05000,4.146
31,self_attn.v_proj,0.0007386387,0.05000,4.160
31,self_attn.o_proj,0.0000730647,0.05000,1.423
31,mlp.gate_proj,0.0007217523,0.05000,2.030
31,mlp.up_proj,0.0007903871,0.05000,2.049
31,mlp.down_proj,0.0005056261,0.05000,3.588
32,self_attn.v_proj,0.0011295374,0.05000,3.986
32,self_attn.k_proj,0.0007691019,0.05000,4.011
32,self_attn.q_proj,0.0030725903,0.05000,4.044
32,self_attn.o_proj,0.0001355138,0.05000,1.374
32,mlp.up_proj,0.0008563334,0.05000,1.920
32,mlp.gate_proj,0.0007635706,0.05000,1.950
32,mlp.down_proj,0.0006399382,0.05000,3.592
33,self_attn.q_proj,0.0068798436,0.05000,3.950
33,self_attn.k_proj,0.0014373954,0.05000,4.008
33,self_attn.v_proj,0.0026777900,0.05000,4.049
33,self_attn.o_proj,0.0001293710,0.05000,1.427
33,mlp.gate_proj,0.0008459430,0.05000,1.951
33,mlp.up_proj,0.0009587818,0.05000,1.976
33,mlp.down_proj,0.0008440783,0.05000,3.667
34,self_attn.k_proj,0.0012213234,0.05000,4.055
34,self_attn.v_proj,0.0019338977,0.05000,4.098
34,self_attn.q_proj,0.0053902740,0.05000,4.110
34,self_attn.o_proj,0.0003464893,0.05000,1.511
34,mlp.up_proj,0.0010697745,0.05000,2.085
34,mlp.gate_proj,0.0010178187,0.05000,2.113
34,mlp.down_proj,0.0010601362,0.05000,3.797
35,self_attn.v_proj,0.0008049977,0.05000,4.341
35,self_attn.q_proj,0.0024685818,0.05000,4.384
35,self_attn.k_proj,0.0006466726,0.05000,4.404
35,self_attn.o_proj,0.0003548001,0.05000,1.465
35,mlp.up_proj,0.0016807885,0.05000,2.054
35,mlp.gate_proj,0.0016346245,0.05000,2.080
35,mlp.down_proj,0.0016113244,0.05000,3.722