File size: 11,667 Bytes
c3b903d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
layer,module,loss,samples,damp,time
0,self_attn.k_proj,0.0000000493,0.05000,5.263
0,self_attn.v_proj,0.0000000430,0.05000,5.230
0,self_attn.q_proj,0.0000001818,0.05000,5.398
0,self_attn.o_proj,0.0000001054,0.05000,1.579
0,mlp.gate_proj,0.0000114880,0.05000,2.158
0,mlp.up_proj,0.0000100735,0.05000,2.185
0,mlp.down_proj,0.0000015222,0.05000,4.023
1,self_attn.v_proj,0.0000000828,0.05000,5.549
1,self_attn.k_proj,0.0000000801,0.05000,5.598
1,self_attn.q_proj,0.0000002977,0.05000,5.632
1,self_attn.o_proj,0.0000001564,0.05000,1.642
1,mlp.up_proj,0.0001828177,0.05000,2.748
1,mlp.gate_proj,0.0004096040,0.05000,2.781
1,mlp.down_proj,0.0000016779,0.05000,4.167
2,self_attn.q_proj,0.0000008498,0.05000,5.458
2,self_attn.k_proj,0.0000002434,0.05000,5.462
2,self_attn.v_proj,0.0000002356,0.05000,5.488
2,self_attn.o_proj,0.0000002095,0.05000,1.587
2,mlp.gate_proj,0.0006597485,0.05000,2.158
2,mlp.up_proj,0.0005201637,0.05000,2.182
2,mlp.down_proj,0.0000012058,0.05000,4.134
3,self_attn.v_proj,0.0000004307,0.05000,4.747
3,self_attn.q_proj,0.0000016371,0.05000,4.795
3,self_attn.k_proj,0.0000004186,0.05000,4.822
3,self_attn.o_proj,0.0000003189,0.05000,1.582
3,mlp.gate_proj,0.0006873334,0.05000,2.253
3,mlp.up_proj,0.0003494455,0.05000,2.278
3,mlp.down_proj,0.0000034346,0.05000,4.083
4,self_attn.k_proj,0.0000008386,0.05000,4.757
4,self_attn.q_proj,0.0000030813,0.05000,4.826
4,self_attn.v_proj,0.0000008369,0.05000,4.856
4,self_attn.o_proj,0.0000004898,0.05000,1.581
4,mlp.gate_proj,0.0005583692,0.05000,2.182
4,mlp.up_proj,0.0002213096,0.05000,2.199
4,mlp.down_proj,0.0000050593,0.05000,4.103
5,self_attn.v_proj,0.0000009181,0.05000,4.694
5,self_attn.q_proj,0.0000033667,0.05000,4.788
5,self_attn.k_proj,0.0000008563,0.05000,4.810
5,self_attn.o_proj,0.0000009128,0.05000,1.617
5,mlp.up_proj,0.0000946976,0.05000,2.171
5,mlp.gate_proj,0.0001955191,0.05000,2.186
5,mlp.down_proj,0.0000073415,0.05000,4.025
6,self_attn.k_proj,0.0000017104,0.05000,4.671
6,self_attn.q_proj,0.0000072547,0.05000,4.673
6,self_attn.v_proj,0.0000019447,0.05000,4.740
6,self_attn.o_proj,0.0000021203,0.05000,1.586
6,mlp.up_proj,0.0001155930,0.05000,2.146
6,mlp.gate_proj,0.0002014494,0.05000,2.173
6,mlp.down_proj,0.0001064753,0.05000,4.027
7,self_attn.v_proj,0.0000040180,0.05000,4.646
7,self_attn.k_proj,0.0000039139,0.05000,4.743
7,self_attn.q_proj,0.0000144505,0.05000,4.788
7,self_attn.o_proj,0.0000026974,0.05000,1.584
7,mlp.gate_proj,0.0002344327,0.05000,2.212
7,mlp.up_proj,0.0001410635,0.05000,2.228
7,mlp.down_proj,0.0000122430,0.05000,4.045
8,self_attn.v_proj,0.0000061553,0.05000,4.724
8,self_attn.k_proj,0.0000056424,0.05000,4.805
8,self_attn.q_proj,0.0000217994,0.05000,4.822
8,self_attn.o_proj,0.0000037739,0.05000,1.553
8,mlp.up_proj,0.0001321165,0.05000,2.204
8,mlp.gate_proj,0.0001614960,0.05000,2.229
8,mlp.down_proj,0.0000182675,0.05000,4.046
9,self_attn.v_proj,0.0000074750,0.05000,4.669
9,self_attn.k_proj,0.0000076202,0.05000,4.776
9,self_attn.q_proj,0.0000272138,0.05000,4.797
9,self_attn.o_proj,0.0000046978,0.05000,1.548
9,mlp.gate_proj,0.0002562806,0.05000,2.187
9,mlp.up_proj,0.0001628826,0.05000,2.215
9,mlp.down_proj,0.0000175799,0.05000,4.044
10,self_attn.v_proj,0.0000116134,0.05000,4.708
10,self_attn.q_proj,0.0000392499,0.05000,4.796
10,self_attn.k_proj,0.0000101168,0.05000,4.825
10,self_attn.o_proj,0.0000065144,0.05000,1.564
10,mlp.gate_proj,0.0001958697,0.05000,2.119
10,mlp.up_proj,0.0001360473,0.05000,2.145
10,mlp.down_proj,0.0000145277,0.05000,4.086
11,self_attn.q_proj,0.0000192507,0.05000,4.803
11,self_attn.k_proj,0.0000052222,0.05000,4.898
11,self_attn.v_proj,0.0000053556,0.05000,4.912
11,self_attn.o_proj,0.0000041119,0.05000,1.587
11,mlp.up_proj,0.0001261680,0.05000,2.254
11,mlp.gate_proj,0.0001601069,0.05000,2.282
11,mlp.down_proj,0.0000135020,0.05000,4.082
12,self_attn.v_proj,0.0000064571,0.05000,4.739
12,self_attn.k_proj,0.0000060496,0.05000,4.798
12,self_attn.q_proj,0.0000229375,0.05000,4.825
12,self_attn.o_proj,0.0000045234,0.05000,1.565
12,mlp.gate_proj,0.0001424156,0.05000,2.162
12,mlp.up_proj,0.0001237598,0.05000,2.199
12,mlp.down_proj,0.0000132686,0.05000,4.109
13,self_attn.v_proj,0.0000043770,0.05000,4.753
13,self_attn.q_proj,0.0000172819,0.05000,4.763
13,self_attn.k_proj,0.0000045546,0.05000,4.833
13,self_attn.o_proj,0.0000035265,0.05000,1.567
13,mlp.up_proj,0.0001254522,0.05000,2.206
13,mlp.gate_proj,0.0001320071,0.05000,2.233
13,mlp.down_proj,0.0000141580,0.05000,4.072
14,self_attn.k_proj,0.0000074098,0.05000,4.615
14,self_attn.v_proj,0.0000077529,0.05000,4.679
14,self_attn.q_proj,0.0000286967,0.05000,4.749
14,self_attn.o_proj,0.0000061177,0.05000,1.550
14,mlp.gate_proj,0.0001299273,0.05000,2.266
14,mlp.up_proj,0.0001247252,0.05000,2.294
14,mlp.down_proj,0.0000125522,0.05000,4.003
15,self_attn.k_proj,0.0000074629,0.05000,4.647
15,self_attn.v_proj,0.0000070869,0.05000,4.706
15,self_attn.q_proj,0.0000287500,0.05000,4.728
15,self_attn.o_proj,0.0000053824,0.05000,1.569
15,mlp.up_proj,0.0001194172,0.05000,2.237
15,mlp.gate_proj,0.0001202382,0.05000,2.265
15,mlp.down_proj,0.0000124121,0.05000,4.006
16,self_attn.v_proj,0.0000138066,0.05000,4.700
16,self_attn.q_proj,0.0000491788,0.05000,4.754
16,self_attn.k_proj,0.0000121632,0.05000,4.776
16,self_attn.o_proj,0.0000056487,0.05000,1.528
16,mlp.gate_proj,0.0001317158,0.05000,2.152
16,mlp.up_proj,0.0001234581,0.05000,2.181
16,mlp.down_proj,0.0000731754,0.05000,3.985
17,self_attn.v_proj,0.0000102906,0.05000,4.538
17,self_attn.k_proj,0.0000092378,0.05000,4.595
17,self_attn.q_proj,0.0000415329,0.05000,4.611
17,self_attn.o_proj,0.0000056563,0.05000,1.586
17,mlp.gate_proj,0.0001159002,0.05000,2.097
17,mlp.up_proj,0.0001103001,0.05000,2.119
17,mlp.down_proj,0.0000116598,0.05000,3.941
18,self_attn.v_proj,0.0000126035,0.05000,4.529
18,self_attn.k_proj,0.0000112181,0.05000,4.624
18,self_attn.q_proj,0.0000459624,0.05000,4.645
18,self_attn.o_proj,0.0000067008,0.05000,1.517
18,mlp.up_proj,0.0001220867,0.05000,2.127
18,mlp.gate_proj,0.0001259595,0.05000,2.151
18,mlp.down_proj,0.0000148286,0.05000,3.990
19,self_attn.v_proj,0.0000220715,0.05000,4.552
19,self_attn.q_proj,0.0000918620,0.05000,4.646
19,self_attn.k_proj,0.0000215181,0.05000,4.661
19,self_attn.o_proj,0.0000093699,0.05000,1.525
19,mlp.up_proj,0.0001385218,0.05000,2.151
19,mlp.gate_proj,0.0001418180,0.05000,2.187
19,mlp.down_proj,0.0000200673,0.05000,4.031
20,self_attn.v_proj,0.0000205235,0.05000,4.611
20,self_attn.k_proj,0.0000189061,0.05000,4.666
20,self_attn.q_proj,0.0000867353,0.05000,4.707
20,self_attn.o_proj,0.0000099226,0.05000,1.529
20,mlp.gate_proj,0.0001534587,0.05000,2.104
20,mlp.up_proj,0.0001536075,0.05000,2.127
20,mlp.down_proj,0.0000229419,0.05000,4.057
21,self_attn.q_proj,0.0001135650,0.05000,4.631
21,self_attn.v_proj,0.0000283243,0.05000,4.673
21,self_attn.k_proj,0.0000256526,0.05000,4.704
21,self_attn.o_proj,0.0000121988,0.05000,1.516
21,mlp.gate_proj,0.0001884211,0.05000,2.185
21,mlp.up_proj,0.0001869941,0.05000,2.210
21,mlp.down_proj,0.0000308657,0.05000,4.063
22,self_attn.v_proj,0.0000583108,0.05000,4.585
22,self_attn.k_proj,0.0000517917,0.05000,4.706
22,self_attn.q_proj,0.0002313806,0.05000,4.729
22,self_attn.o_proj,0.0000236768,0.05000,1.536
22,mlp.gate_proj,0.0002260857,0.05000,2.162
22,mlp.up_proj,0.0002135566,0.05000,2.166
22,mlp.down_proj,0.0000494211,0.05000,4.036
23,self_attn.q_proj,0.0002298174,0.05000,4.589
23,self_attn.v_proj,0.0000604328,0.05000,4.642
23,self_attn.k_proj,0.0000521171,0.05000,4.669
23,self_attn.o_proj,0.0000286128,0.05000,1.542
23,mlp.up_proj,0.0002542144,0.05000,2.091
23,mlp.gate_proj,0.0002834950,0.05000,2.117
23,mlp.down_proj,0.0000714043,0.05000,3.947
24,self_attn.q_proj,0.0003790222,0.05000,4.545
24,self_attn.k_proj,0.0000869552,0.05000,4.611
24,self_attn.v_proj,0.0001125668,0.05000,4.636
24,self_attn.o_proj,0.0000321837,0.05000,1.519
24,mlp.gate_proj,0.0003256180,0.05000,2.167
24,mlp.up_proj,0.0002876031,0.05000,2.184
24,mlp.down_proj,0.0000910746,0.05000,4.001
25,self_attn.v_proj,0.0000745178,0.05000,4.557
25,self_attn.k_proj,0.0000651236,0.05000,4.615
25,self_attn.q_proj,0.0002660532,0.05000,4.618
25,self_attn.o_proj,0.0000205836,0.05000,1.570
25,mlp.gate_proj,0.0003814693,0.05000,2.173
25,mlp.up_proj,0.0003324611,0.05000,2.198
25,mlp.down_proj,0.0001092646,0.05000,3.999
26,self_attn.q_proj,0.0004336856,0.05000,4.503
26,self_attn.k_proj,0.0001006330,0.05000,4.610
26,self_attn.v_proj,0.0001189561,0.05000,4.639
26,self_attn.o_proj,0.0000175224,0.05000,1.559
26,mlp.gate_proj,0.0004622448,0.05000,2.077
26,mlp.up_proj,0.0004160738,0.05000,2.105
26,mlp.down_proj,0.0001339366,0.05000,4.006
27,self_attn.q_proj,0.0005402514,0.05000,4.494
27,self_attn.v_proj,0.0001606098,0.05000,4.572
27,self_attn.k_proj,0.0001219699,0.05000,4.587
27,self_attn.o_proj,0.0000265767,0.05000,1.516
27,mlp.up_proj,0.0004859503,0.05000,2.107
27,mlp.gate_proj,0.0005210892,0.05000,2.127
27,mlp.down_proj,0.0001976302,0.05000,3.981
28,self_attn.v_proj,0.0001727967,0.05000,4.584
28,self_attn.q_proj,0.0006006611,0.05000,4.654
28,self_attn.k_proj,0.0001497753,0.05000,4.672
28,self_attn.o_proj,0.0000468070,0.05000,1.525
28,mlp.gate_proj,0.0005994609,0.05000,2.076
28,mlp.up_proj,0.0005836146,0.05000,2.096
28,mlp.down_proj,0.0003001203,0.05000,4.036
29,self_attn.q_proj,0.0014331605,0.05000,4.599
29,self_attn.v_proj,0.0004406140,0.05000,4.658
29,self_attn.k_proj,0.0003258389,0.05000,4.691
29,self_attn.o_proj,0.0000410523,0.05000,1.552
29,mlp.up_proj,0.0007288250,0.05000,2.198
29,mlp.gate_proj,0.0007126805,0.05000,2.224
29,mlp.down_proj,0.0003305236,0.05000,4.009
30,self_attn.k_proj,0.0004136996,0.05000,4.614
30,self_attn.v_proj,0.0005229475,0.05000,4.680
30,self_attn.q_proj,0.0016422393,0.05000,4.692
30,self_attn.o_proj,0.0000856037,0.05000,1.541
30,mlp.up_proj,0.0007942698,0.05000,2.189
30,mlp.gate_proj,0.0007508333,0.05000,2.222
30,mlp.down_proj,0.0004645914,0.05000,4.039
31,self_attn.k_proj,0.0005456899,0.05000,4.581
31,self_attn.q_proj,0.0020693638,0.05000,4.670
31,self_attn.v_proj,0.0007509262,0.05000,4.698
31,self_attn.o_proj,0.0000844962,0.05000,1.550
31,mlp.gate_proj,0.0007564633,0.05000,2.158
31,mlp.up_proj,0.0008417759,0.05000,2.180
31,mlp.down_proj,0.0005862378,0.05000,4.025
32,self_attn.k_proj,0.0007358906,0.05000,4.489
32,self_attn.v_proj,0.0011125570,0.05000,4.616
32,self_attn.q_proj,0.0030247277,0.05000,4.656
32,self_attn.o_proj,0.0001102863,0.05000,1.526
32,mlp.gate_proj,0.0007851195,0.05000,2.050
32,mlp.up_proj,0.0008939442,0.05000,2.082
32,mlp.down_proj,0.0007211865,0.05000,4.016
33,self_attn.k_proj,0.0012740580,0.05000,4.581
33,self_attn.q_proj,0.0062236731,0.05000,4.623
33,self_attn.v_proj,0.0023895111,0.05000,4.652
33,self_attn.o_proj,0.0001302601,0.05000,1.533
33,mlp.gate_proj,0.0008421095,0.05000,2.077
33,mlp.up_proj,0.0009716948,0.05000,2.091
33,mlp.down_proj,0.0009164421,0.05000,4.036
34,self_attn.k_proj,0.0010975073,0.05000,4.594
34,self_attn.v_proj,0.0018084718,0.05000,4.686
34,self_attn.q_proj,0.0049361077,0.05000,4.708
34,self_attn.o_proj,0.0002369204,0.05000,1.572
34,mlp.up_proj,0.0010833157,0.05000,2.200
34,mlp.gate_proj,0.0010048764,0.05000,2.219
34,mlp.down_proj,0.0010852498,0.05000,4.017
35,self_attn.k_proj,0.0006271339,0.05000,4.627
35,self_attn.q_proj,0.0024465988,0.05000,4.708
35,self_attn.v_proj,0.0008356234,0.05000,4.728
35,self_attn.o_proj,0.0003305586,0.05000,1.565
35,mlp.gate_proj,0.0016273851,0.05000,2.216
35,mlp.up_proj,0.0016999776,0.05000,2.243
35,mlp.down_proj,0.0020161150,0.05000,4.022