File size: 11,667 Bytes
bc97a36
2511587
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
layer,module,loss,samples,damp,time
0,self_attn.q_proj,0.0000004658,0.05000,5.010
0,self_attn.v_proj,0.0000001105,0.05000,5.075
0,self_attn.k_proj,0.0000001265,0.05000,5.110
0,self_attn.o_proj,0.0000002945,0.05000,1.782
0,mlp.gate_proj,0.0000289746,0.05000,2.652
0,mlp.up_proj,0.0000252975,0.05000,2.701
0,mlp.down_proj,0.0000037377,0.05000,4.487
1,self_attn.v_proj,0.0000002128,0.05000,5.102
1,self_attn.k_proj,0.0000002043,0.05000,5.172
1,self_attn.q_proj,0.0000007588,0.05000,5.212
1,self_attn.o_proj,0.0000003918,0.05000,1.697
1,mlp.up_proj,0.0004287361,0.05000,2.695
1,mlp.gate_proj,0.0009483043,0.05000,2.728
1,mlp.down_proj,0.0000043074,0.05000,4.332
2,self_attn.q_proj,0.0000021343,0.05000,5.001
2,self_attn.v_proj,0.0000005962,0.05000,5.076
2,self_attn.k_proj,0.0000006101,0.05000,5.116
2,self_attn.o_proj,0.0000005367,0.05000,1.751
2,mlp.gate_proj,0.0015602610,0.05000,2.559
2,mlp.up_proj,0.0012285131,0.05000,2.596
2,mlp.down_proj,0.0000029970,0.05000,4.283
3,self_attn.q_proj,0.0000041543,0.05000,5.029
3,self_attn.v_proj,0.0000011004,0.05000,5.103
3,self_attn.k_proj,0.0000010634,0.05000,5.118
3,self_attn.o_proj,0.0000007820,0.05000,1.644
3,mlp.up_proj,0.0008446519,0.05000,2.626
3,mlp.gate_proj,0.0016393722,0.05000,2.648
3,mlp.down_proj,0.0000086119,0.05000,4.336
4,self_attn.k_proj,0.0000021310,0.05000,5.020
4,self_attn.q_proj,0.0000078954,0.05000,5.044
4,self_attn.v_proj,0.0000021595,0.05000,5.111
4,self_attn.o_proj,0.0000012900,0.05000,1.737
4,mlp.up_proj,0.0005500840,0.05000,2.736
4,mlp.gate_proj,0.0013762149,0.05000,2.738
4,mlp.down_proj,0.0000128145,0.05000,4.359
5,self_attn.q_proj,0.0000086130,0.05000,4.662
5,self_attn.v_proj,0.0000023674,0.05000,4.759
5,self_attn.k_proj,0.0000021791,0.05000,4.780
5,self_attn.o_proj,0.0000023793,0.05000,1.532
5,mlp.up_proj,0.0002447622,0.05000,2.342
5,mlp.gate_proj,0.0005030251,0.05000,2.389
5,mlp.down_proj,0.0000185442,0.05000,3.948
6,self_attn.k_proj,0.0000044010,0.05000,4.673
6,self_attn.v_proj,0.0000050325,0.05000,4.750
6,self_attn.q_proj,0.0000186530,0.05000,4.770
6,self_attn.o_proj,0.0000058681,0.05000,1.525
6,mlp.gate_proj,0.0005370448,0.05000,2.268
6,mlp.up_proj,0.0003059179,0.05000,2.269
6,mlp.down_proj,0.0009229853,0.05000,4.023
7,self_attn.q_proj,0.0000376636,0.05000,4.710
7,self_attn.k_proj,0.0000101153,0.05000,4.744
7,self_attn.v_proj,0.0000104857,0.05000,4.764
7,self_attn.o_proj,0.0000067601,0.05000,1.529
7,mlp.up_proj,0.0003681960,0.05000,2.167
7,mlp.gate_proj,0.0006117455,0.05000,2.191
7,mlp.down_proj,0.0000305151,0.05000,3.961
8,self_attn.k_proj,0.0000149234,0.05000,4.718
8,self_attn.v_proj,0.0000164171,0.05000,4.838
8,self_attn.q_proj,0.0000576840,0.05000,4.853
8,self_attn.o_proj,0.0000096945,0.05000,1.528
8,mlp.gate_proj,0.0004354699,0.05000,2.192
8,mlp.up_proj,0.0003553537,0.05000,2.220
8,mlp.down_proj,0.0000482470,0.05000,3.933
9,self_attn.q_proj,0.0000708480,0.05000,4.591
9,self_attn.v_proj,0.0000198840,0.05000,4.660
9,self_attn.k_proj,0.0000197770,0.05000,4.679
9,self_attn.o_proj,0.0000112224,0.05000,1.536
9,mlp.up_proj,0.0004471922,0.05000,2.262
9,mlp.gate_proj,0.0007011541,0.05000,2.280
9,mlp.down_proj,0.0000475860,0.05000,4.021
10,self_attn.v_proj,0.0000316658,0.05000,4.767
10,self_attn.k_proj,0.0000271843,0.05000,4.818
10,self_attn.q_proj,0.0001055211,0.05000,4.854
10,self_attn.o_proj,0.0000176322,0.05000,1.520
10,mlp.up_proj,0.0003787521,0.05000,2.269
10,mlp.gate_proj,0.0005428473,0.05000,2.288
10,mlp.down_proj,0.0000400349,0.05000,4.012
11,self_attn.v_proj,0.0000144231,0.05000,4.735
11,self_attn.k_proj,0.0000135963,0.05000,4.801
11,self_attn.q_proj,0.0000505602,0.05000,4.845
11,self_attn.o_proj,0.0000114296,0.05000,1.576
11,mlp.up_proj,0.0003493419,0.05000,2.223
11,mlp.gate_proj,0.0004413690,0.05000,2.253
11,mlp.down_proj,0.0000366131,0.05000,4.017
12,self_attn.k_proj,0.0000153139,0.05000,4.535
12,self_attn.v_proj,0.0000167666,0.05000,4.586
12,self_attn.q_proj,0.0000584962,0.05000,4.616
12,self_attn.o_proj,0.0000125013,0.05000,1.622
12,mlp.gate_proj,0.0003892739,0.05000,2.209
12,mlp.up_proj,0.0003392469,0.05000,2.254
12,mlp.down_proj,0.0000370598,0.05000,4.058
13,self_attn.k_proj,0.0000116586,0.05000,4.696
13,self_attn.q_proj,0.0000443353,0.05000,4.778
13,self_attn.v_proj,0.0000114623,0.05000,4.786
13,self_attn.o_proj,0.0000097119,0.05000,1.567
13,mlp.gate_proj,0.0003625256,0.05000,2.177
13,mlp.up_proj,0.0003459045,0.05000,2.200
13,mlp.down_proj,0.0000401894,0.05000,3.992
14,self_attn.k_proj,0.0000190907,0.05000,4.754
14,self_attn.v_proj,0.0000204954,0.05000,4.793
14,self_attn.q_proj,0.0000746915,0.05000,4.818
14,self_attn.o_proj,0.0000171502,0.05000,1.543
14,mlp.up_proj,0.0003421074,0.05000,2.281
14,mlp.gate_proj,0.0003563725,0.05000,2.298
14,mlp.down_proj,0.0000358082,0.05000,4.040
15,self_attn.v_proj,0.0000185207,0.05000,4.646
15,self_attn.k_proj,0.0000190547,0.05000,4.731
15,self_attn.q_proj,0.0000740901,0.05000,4.774
15,self_attn.o_proj,0.0000141826,0.05000,1.478
15,mlp.gate_proj,0.0003278327,0.05000,2.248
15,mlp.up_proj,0.0003265059,0.05000,2.267
15,mlp.down_proj,0.0000344754,0.05000,4.042
16,self_attn.k_proj,0.0000315280,0.05000,4.783
16,self_attn.v_proj,0.0000365092,0.05000,4.807
16,self_attn.q_proj,0.0001286270,0.05000,4.856
16,self_attn.o_proj,0.0000152998,0.05000,1.529
16,mlp.up_proj,0.0003464136,0.05000,2.168
16,mlp.gate_proj,0.0003686556,0.05000,2.192
16,mlp.down_proj,0.0003761285,0.05000,4.012
17,self_attn.v_proj,0.0000274548,0.05000,4.671
17,self_attn.q_proj,0.0001093356,0.05000,4.719
17,self_attn.k_proj,0.0000240166,0.05000,4.734
17,self_attn.o_proj,0.0000160825,0.05000,1.532
17,mlp.up_proj,0.0003082692,0.05000,2.155
17,mlp.gate_proj,0.0003231052,0.05000,2.188
17,mlp.down_proj,0.0000337791,0.05000,4.057
18,self_attn.v_proj,0.0000330313,0.05000,4.643
18,self_attn.q_proj,0.0001190752,0.05000,4.730
18,self_attn.k_proj,0.0000287658,0.05000,4.754
18,self_attn.o_proj,0.0000195600,0.05000,1.516
18,mlp.up_proj,0.0003365247,0.05000,2.106
18,mlp.gate_proj,0.0003462859,0.05000,2.140
18,mlp.down_proj,0.0000416005,0.05000,3.978
19,self_attn.q_proj,0.0002362754,0.05000,4.728
19,self_attn.v_proj,0.0000574272,0.05000,4.794
19,self_attn.k_proj,0.0000544455,0.05000,4.836
19,self_attn.o_proj,0.0000249074,0.05000,1.527
19,mlp.up_proj,0.0003698838,0.05000,2.291
19,mlp.gate_proj,0.0003773468,0.05000,2.316
19,mlp.down_proj,0.0000530812,0.05000,3.979
20,self_attn.q_proj,0.0002188809,0.05000,4.637
20,self_attn.k_proj,0.0000471045,0.05000,4.642
20,self_attn.v_proj,0.0000521005,0.05000,4.717
20,self_attn.o_proj,0.0000248449,0.05000,1.551
20,mlp.up_proj,0.0004029941,0.05000,2.137
20,mlp.gate_proj,0.0004013012,0.05000,2.158
20,mlp.down_proj,0.0000585520,0.05000,4.006
21,self_attn.v_proj,0.0000706983,0.05000,4.767
21,self_attn.k_proj,0.0000634547,0.05000,4.821
21,self_attn.q_proj,0.0002828724,0.05000,4.833
21,self_attn.o_proj,0.0000280543,0.05000,1.523
21,mlp.up_proj,0.0004820233,0.05000,2.173
21,mlp.gate_proj,0.0004849778,0.05000,2.193
21,mlp.down_proj,0.0000768389,0.05000,3.981
22,self_attn.v_proj,0.0001434762,0.05000,4.732
22,self_attn.k_proj,0.0001253538,0.05000,4.819
22,self_attn.q_proj,0.0005631076,0.05000,4.846
22,self_attn.o_proj,0.0000553381,0.05000,1.557
22,mlp.up_proj,0.0005509696,0.05000,2.289
22,mlp.gate_proj,0.0005793622,0.05000,2.321
22,mlp.down_proj,0.0001229682,0.05000,4.013
23,self_attn.k_proj,0.0001267572,0.05000,4.744
23,self_attn.q_proj,0.0005634705,0.05000,4.804
23,self_attn.v_proj,0.0001490576,0.05000,4.816
23,self_attn.o_proj,0.0000699550,0.05000,1.576
23,mlp.up_proj,0.0006518718,0.05000,2.161
23,mlp.gate_proj,0.0007231190,0.05000,2.181
23,mlp.down_proj,0.0001715308,0.05000,4.017
24,self_attn.k_proj,0.0002084854,0.05000,4.662
24,self_attn.q_proj,0.0009156117,0.05000,4.669
24,self_attn.v_proj,0.0002740377,0.05000,4.713
24,self_attn.o_proj,0.0000720820,0.05000,1.559
24,mlp.up_proj,0.0007225850,0.05000,2.197
24,mlp.gate_proj,0.0008156972,0.05000,2.225
24,mlp.down_proj,0.0002084210,0.05000,3.929
25,self_attn.k_proj,0.0001581930,0.05000,4.615
25,self_attn.v_proj,0.0001814060,0.05000,4.679
25,self_attn.q_proj,0.0006456851,0.05000,4.706
25,self_attn.o_proj,0.0000473329,0.05000,1.560
25,mlp.up_proj,0.0008242684,0.05000,2.142
25,mlp.gate_proj,0.0009428286,0.05000,2.172
25,mlp.down_proj,0.0002486016,0.05000,3.990
26,self_attn.k_proj,0.0002404927,0.05000,4.621
26,self_attn.v_proj,0.0002888501,0.05000,4.642
26,self_attn.q_proj,0.0010402773,0.05000,4.699
26,self_attn.o_proj,0.0000435772,0.05000,1.576
26,mlp.gate_proj,0.0011219122,0.05000,2.293
26,mlp.up_proj,0.0010119856,0.05000,2.308
26,mlp.down_proj,0.0002938876,0.05000,3.967
27,self_attn.q_proj,0.0012800641,0.05000,4.697
27,self_attn.v_proj,0.0003850639,0.05000,4.761
27,self_attn.k_proj,0.0002874724,0.05000,4.777
27,self_attn.o_proj,0.0000617831,0.05000,1.501
27,mlp.gate_proj,0.0012472775,0.05000,2.290
27,mlp.up_proj,0.0011642600,0.05000,2.332
27,mlp.down_proj,0.0004163454,0.05000,3.981
28,self_attn.k_proj,0.0003459948,0.05000,4.645
28,self_attn.q_proj,0.0013993718,0.05000,4.743
28,self_attn.v_proj,0.0004060790,0.05000,4.769
28,self_attn.o_proj,0.0000979789,0.05000,1.566
28,mlp.up_proj,0.0013726200,0.05000,2.287
28,mlp.gate_proj,0.0014115914,0.05000,2.297
28,mlp.down_proj,0.0006263638,0.05000,4.001
29,self_attn.k_proj,0.0007600260,0.05000,4.717
29,self_attn.v_proj,0.0010328331,0.05000,4.782
29,self_attn.q_proj,0.0033356990,0.05000,4.817
29,self_attn.o_proj,0.0000782954,0.05000,1.573
29,mlp.up_proj,0.0016839991,0.05000,2.209
29,mlp.gate_proj,0.0016476954,0.05000,2.240
29,mlp.down_proj,0.0006718912,0.05000,4.071
30,self_attn.k_proj,0.0009500644,0.05000,4.648
30,self_attn.q_proj,0.0037828581,0.05000,4.691
30,self_attn.v_proj,0.0012155685,0.05000,4.745
30,self_attn.o_proj,0.0001820555,0.05000,1.580
30,mlp.gate_proj,0.0017180550,0.05000,2.131
30,mlp.up_proj,0.0018161121,0.05000,2.136
30,mlp.down_proj,0.0009175093,0.05000,4.057
31,self_attn.q_proj,0.0047508813,0.05000,4.746
31,self_attn.k_proj,0.0012552110,0.05000,4.801
31,self_attn.v_proj,0.0017514399,0.05000,4.843
31,self_attn.o_proj,0.0001683568,0.05000,1.558
31,mlp.gate_proj,0.0016933631,0.05000,2.218
31,mlp.up_proj,0.0018819273,0.05000,2.249
31,mlp.down_proj,0.0011198272,0.05000,4.011
32,self_attn.q_proj,0.0068592699,0.05000,4.781
32,self_attn.v_proj,0.0025475807,0.05000,4.837
32,self_attn.k_proj,0.0016688757,0.05000,4.866
32,self_attn.o_proj,0.0001884213,0.05000,1.525
32,mlp.up_proj,0.0019701249,0.05000,2.197
32,mlp.gate_proj,0.0017300433,0.05000,2.227
32,mlp.down_proj,0.0013363534,0.05000,4.113
33,self_attn.q_proj,0.0142963547,0.05000,4.643
33,self_attn.k_proj,0.0029161619,0.05000,4.714
33,self_attn.v_proj,0.0055924580,0.05000,4.730
33,self_attn.o_proj,0.0002554161,0.05000,1.524
33,mlp.up_proj,0.0021203640,0.05000,2.182
33,mlp.gate_proj,0.0018360139,0.05000,2.221
33,mlp.down_proj,0.0017152019,0.05000,4.001
34,self_attn.q_proj,0.0111581404,0.05000,4.838
34,self_attn.v_proj,0.0041854484,0.05000,4.842
34,self_attn.k_proj,0.0024777448,0.05000,4.885
34,self_attn.o_proj,0.0005260598,0.05000,1.526
34,mlp.gate_proj,0.0021799901,0.05000,2.246
34,mlp.up_proj,0.0023547660,0.05000,2.265
34,mlp.down_proj,0.0020657629,0.05000,4.042
35,self_attn.q_proj,0.0052947048,0.05000,4.801
35,self_attn.k_proj,0.0013647166,0.05000,4.871
35,self_attn.v_proj,0.0018166249,0.05000,4.910
35,self_attn.o_proj,0.0007476909,0.05000,1.543
35,mlp.up_proj,0.0045160744,0.05000,2.199
35,mlp.gate_proj,0.0039883456,0.05000,2.221
35,mlp.down_proj,0.0036397912,0.05000,4.071