File size: 11,667 Bytes
2d696fe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
layer,module,loss,samples,damp,time
0,self_attn.k_proj,0.0000005446,0.05000,4.435
0,self_attn.v_proj,0.0000004809,0.05000,4.490
0,self_attn.q_proj,0.0000019953,0.05000,4.522
0,self_attn.o_proj,0.0000013446,0.05000,1.474
0,mlp.gate_proj,0.0001399555,0.05000,2.281
0,mlp.up_proj,0.0001233887,0.05000,2.308
0,mlp.down_proj,0.0000115761,0.05000,3.801
1,self_attn.k_proj,0.0000008938,0.05000,4.646
1,self_attn.v_proj,0.0000009459,0.05000,4.716
1,self_attn.q_proj,0.0000033180,0.05000,4.747
1,self_attn.o_proj,0.0000019172,0.05000,1.496
1,mlp.gate_proj,0.0041653160,0.05000,2.307
1,mlp.up_proj,0.0018882145,0.05000,2.328
1,mlp.down_proj,0.0000226382,0.05000,3.953
2,self_attn.q_proj,0.0000094249,0.05000,4.620
2,self_attn.v_proj,0.0000027178,0.05000,4.675
2,self_attn.k_proj,0.0000026439,0.05000,4.706
2,self_attn.o_proj,0.0000024441,0.05000,1.528
2,mlp.up_proj,0.0056557384,0.05000,2.318
2,mlp.gate_proj,0.0073925028,0.05000,2.362
2,mlp.down_proj,0.0000110274,0.05000,3.862
3,self_attn.v_proj,0.0000051766,0.05000,4.551
3,self_attn.k_proj,0.0000047641,0.05000,4.605
3,self_attn.q_proj,0.0000187628,0.05000,4.655
3,self_attn.o_proj,0.0000038860,0.05000,1.532
3,mlp.gate_proj,0.0075710094,0.05000,2.311
3,mlp.up_proj,0.0039994645,0.05000,2.335
3,mlp.down_proj,0.0000351612,0.05000,3.938
4,self_attn.v_proj,0.0000105205,0.05000,4.585
4,self_attn.q_proj,0.0000371834,0.05000,4.616
4,self_attn.k_proj,0.0000098234,0.05000,4.657
4,self_attn.o_proj,0.0000067732,0.05000,1.475
4,mlp.gate_proj,0.0071237201,0.05000,2.310
4,mlp.up_proj,0.0029747248,0.05000,2.344
4,mlp.down_proj,0.0000574324,0.05000,3.899
5,self_attn.q_proj,0.0000413305,0.05000,4.582
5,self_attn.k_proj,0.0000102927,0.05000,4.682
5,self_attn.v_proj,0.0000116420,0.05000,4.687
5,self_attn.o_proj,0.0000104877,0.05000,1.517
5,mlp.gate_proj,0.0025908125,0.05000,2.189
5,mlp.up_proj,0.0013660692,0.05000,2.231
5,mlp.down_proj,0.0000756003,0.05000,3.986
6,self_attn.v_proj,0.0000247758,0.05000,4.602
6,self_attn.k_proj,0.0000211257,0.05000,4.647
6,self_attn.q_proj,0.0000905059,0.05000,4.674
6,self_attn.o_proj,0.0000300698,0.05000,1.613
6,mlp.gate_proj,0.0030917147,0.05000,2.328
6,mlp.up_proj,0.0018886445,0.05000,2.331
6,mlp.down_proj,0.0332592290,0.05000,3.771
7,self_attn.k_proj,0.0000501618,0.05000,4.714
7,self_attn.v_proj,0.0000571124,0.05000,4.742
7,self_attn.q_proj,0.0001914269,0.05000,4.786
7,self_attn.o_proj,0.0000288241,0.05000,1.529
7,mlp.gate_proj,0.0034643194,0.05000,2.267
7,mlp.up_proj,0.0022614451,0.05000,2.286
7,mlp.down_proj,0.0001239395,0.05000,3.985
8,self_attn.v_proj,0.0000967413,0.05000,4.581
8,self_attn.q_proj,0.0003134763,0.05000,4.654
8,self_attn.k_proj,0.0000789401,0.05000,4.675
8,self_attn.o_proj,0.0000430230,0.05000,1.496
8,mlp.gate_proj,0.0026727254,0.05000,2.208
8,mlp.up_proj,0.0022354008,0.05000,2.211
8,mlp.down_proj,0.0002008462,0.05000,3.878
9,self_attn.v_proj,0.0001148402,0.05000,4.588
9,self_attn.q_proj,0.0003638978,0.05000,4.640
9,self_attn.k_proj,0.0000975615,0.05000,4.660
9,self_attn.o_proj,0.0000505605,0.05000,1.508
9,mlp.up_proj,0.0028585258,0.05000,2.225
9,mlp.gate_proj,0.0043162701,0.05000,2.256
9,mlp.down_proj,0.0002047321,0.05000,3.899
10,self_attn.k_proj,0.0001473746,0.05000,4.548
10,self_attn.q_proj,0.0005854961,0.05000,4.614
10,self_attn.v_proj,0.0001894117,0.05000,4.653
10,self_attn.o_proj,0.0000811983,0.05000,1.509
10,mlp.gate_proj,0.0033544317,0.05000,2.200
10,mlp.up_proj,0.0023989295,0.05000,2.222
10,mlp.down_proj,0.0001772248,0.05000,4.003
11,self_attn.v_proj,0.0000827167,0.05000,4.690
11,self_attn.q_proj,0.0002662091,0.05000,4.735
11,self_attn.k_proj,0.0000691783,0.05000,4.782
11,self_attn.o_proj,0.0000499789,0.05000,1.552
11,mlp.gate_proj,0.0026805456,0.05000,2.212
11,mlp.up_proj,0.0021580690,0.05000,2.233
11,mlp.down_proj,0.0001650185,0.05000,3.973
12,self_attn.q_proj,0.0003080287,0.05000,4.437
12,self_attn.v_proj,0.0000938578,0.05000,4.512
12,self_attn.k_proj,0.0000781946,0.05000,4.542
12,self_attn.o_proj,0.0000578502,0.05000,1.546
12,mlp.gate_proj,0.0023073073,0.05000,2.179
12,mlp.up_proj,0.0020454499,0.05000,2.207
12,mlp.down_proj,0.0001695974,0.05000,3.923
13,self_attn.q_proj,0.0002273831,0.05000,4.594
13,self_attn.v_proj,0.0000638472,0.05000,4.635
13,self_attn.k_proj,0.0000578739,0.05000,4.654
13,self_attn.o_proj,0.0000429728,0.05000,1.494
13,mlp.up_proj,0.0020749440,0.05000,2.240
13,mlp.gate_proj,0.0021318999,0.05000,2.263
13,mlp.down_proj,0.0001738223,0.05000,3.795
14,self_attn.q_proj,0.0004071593,0.05000,4.546
14,self_attn.v_proj,0.0001184261,0.05000,4.615
14,self_attn.k_proj,0.0001007172,0.05000,4.637
14,self_attn.o_proj,0.0000832822,0.05000,1.471
14,mlp.gate_proj,0.0021145169,0.05000,2.254
14,mlp.up_proj,0.0020794049,0.05000,2.288
14,mlp.down_proj,0.0001682187,0.05000,3.931
15,self_attn.q_proj,0.0003949634,0.05000,4.506
15,self_attn.k_proj,0.0000972706,0.05000,4.567
15,self_attn.v_proj,0.0001075819,0.05000,4.595
15,self_attn.o_proj,0.0000702698,0.05000,1.511
15,mlp.up_proj,0.0019951703,0.05000,2.234
15,mlp.gate_proj,0.0019534257,0.05000,2.254
15,mlp.down_proj,0.0001479562,0.05000,3.895
16,self_attn.k_proj,0.0001644773,0.05000,4.625
16,self_attn.v_proj,0.0002094128,0.05000,4.660
16,self_attn.q_proj,0.0006918110,0.05000,4.681
16,self_attn.o_proj,0.0000710968,0.05000,1.467
16,mlp.up_proj,0.0021764796,0.05000,2.290
16,mlp.gate_proj,0.0022640927,0.05000,2.310
16,mlp.down_proj,0.0033198516,0.05000,3.882
17,self_attn.k_proj,0.0001204673,0.05000,4.521
17,self_attn.q_proj,0.0005616263,0.05000,4.611
17,self_attn.v_proj,0.0001526302,0.05000,4.643
17,self_attn.o_proj,0.0000714830,0.05000,1.504
17,mlp.gate_proj,0.0018528427,0.05000,2.291
17,mlp.up_proj,0.0018205950,0.05000,2.312
17,mlp.down_proj,0.0001379722,0.05000,3.952
18,self_attn.q_proj,0.0006088453,0.05000,4.630
18,self_attn.v_proj,0.0001791300,0.05000,4.672
18,self_attn.k_proj,0.0001419126,0.05000,4.703
18,self_attn.o_proj,0.0000788589,0.05000,1.453
18,mlp.up_proj,0.0019597744,0.05000,2.320
18,mlp.gate_proj,0.0019572557,0.05000,2.347
18,mlp.down_proj,0.0001711005,0.05000,3.881
19,self_attn.k_proj,0.0002610412,0.05000,4.595
19,self_attn.v_proj,0.0003096798,0.05000,4.659
19,self_attn.q_proj,0.0011777417,0.05000,4.678
19,self_attn.o_proj,0.0001044770,0.05000,1.502
19,mlp.gate_proj,0.0020530066,0.05000,2.263
19,mlp.up_proj,0.0020742333,0.05000,2.289
19,mlp.down_proj,0.0001970563,0.05000,3.836
20,self_attn.q_proj,0.0010637725,0.05000,4.590
20,self_attn.v_proj,0.0002682226,0.05000,4.641
20,self_attn.k_proj,0.0002198448,0.05000,4.642
20,self_attn.o_proj,0.0000939247,0.05000,1.528
20,mlp.up_proj,0.0022529307,0.05000,2.304
20,mlp.gate_proj,0.0021745552,0.05000,2.330
20,mlp.down_proj,0.0002165000,0.05000,3.838
21,self_attn.q_proj,0.0013895572,0.05000,4.544
21,self_attn.v_proj,0.0003581413,0.05000,4.568
21,self_attn.k_proj,0.0002983564,0.05000,4.609
21,self_attn.o_proj,0.0001169767,0.05000,1.541
21,mlp.gate_proj,0.0025367997,0.05000,2.320
21,mlp.up_proj,0.0026352093,0.05000,2.351
21,mlp.down_proj,0.0002726330,0.05000,3.887
22,self_attn.k_proj,0.0005701520,0.05000,4.537
22,self_attn.q_proj,0.0026601848,0.05000,4.591
22,self_attn.v_proj,0.0007194130,0.05000,4.608
22,self_attn.o_proj,0.0002020238,0.05000,1.531
22,mlp.gate_proj,0.0028571501,0.05000,2.316
22,mlp.up_proj,0.0028531155,0.05000,2.345
22,mlp.down_proj,0.0004290918,0.05000,3.869
23,self_attn.v_proj,0.0006810734,0.05000,4.633
23,self_attn.q_proj,0.0026127356,0.05000,4.709
23,self_attn.k_proj,0.0005518920,0.05000,4.724
23,self_attn.o_proj,0.0002539472,0.05000,1.481
23,mlp.up_proj,0.0033200211,0.05000,2.287
23,mlp.gate_proj,0.0035143472,0.05000,2.324
23,mlp.down_proj,0.0006081266,0.05000,3.900
24,self_attn.q_proj,0.0042166189,0.05000,4.470
24,self_attn.k_proj,0.0009021544,0.05000,4.528
24,self_attn.v_proj,0.0012101352,0.05000,4.542
24,self_attn.o_proj,0.0002619612,0.05000,1.513
24,mlp.gate_proj,0.0039473643,0.05000,2.217
24,mlp.up_proj,0.0036371494,0.05000,2.262
24,mlp.down_proj,0.0007358780,0.05000,3.948
25,self_attn.v_proj,0.0008588059,0.05000,4.613
25,self_attn.k_proj,0.0007072059,0.05000,4.680
25,self_attn.q_proj,0.0030050377,0.05000,4.714
25,self_attn.o_proj,0.0001392516,0.05000,1.505
25,mlp.up_proj,0.0041476647,0.05000,2.311
25,mlp.gate_proj,0.0045728512,0.05000,2.333
25,mlp.down_proj,0.0008995740,0.05000,3.832
26,self_attn.v_proj,0.0013400384,0.05000,4.516
26,self_attn.q_proj,0.0048303925,0.05000,4.567
26,self_attn.k_proj,0.0010677429,0.05000,4.585
26,self_attn.o_proj,0.0001389864,0.05000,1.591
26,mlp.up_proj,0.0051213020,0.05000,2.350
26,mlp.gate_proj,0.0054934720,0.05000,2.364
26,mlp.down_proj,0.0010003413,0.05000,3.923
27,self_attn.q_proj,0.0059571299,0.05000,4.440
27,self_attn.k_proj,0.0012676684,0.05000,4.500
27,self_attn.v_proj,0.0017379407,0.05000,4.525
27,self_attn.o_proj,0.0001889476,0.05000,1.517
27,mlp.up_proj,0.0058125995,0.05000,2.315
27,mlp.gate_proj,0.0060353899,0.05000,2.331
27,mlp.down_proj,0.0013123413,0.05000,3.845
28,self_attn.k_proj,0.0014861768,0.05000,4.549
28,self_attn.v_proj,0.0018484022,0.05000,4.578
28,self_attn.q_proj,0.0063890707,0.05000,4.628
28,self_attn.o_proj,0.0003140973,0.05000,1.510
28,mlp.gate_proj,0.0067501478,0.05000,2.178
28,mlp.up_proj,0.0067735156,0.05000,2.204
28,mlp.down_proj,0.0018620787,0.05000,3.949
29,self_attn.v_proj,0.0043428479,0.05000,4.569
29,self_attn.k_proj,0.0031365861,0.05000,4.624
29,self_attn.q_proj,0.0145508105,0.05000,4.633
29,self_attn.o_proj,0.0002052081,0.05000,1.491
29,mlp.up_proj,0.0082491106,0.05000,2.178
29,mlp.gate_proj,0.0078799528,0.05000,2.198
29,mlp.down_proj,0.0022954082,0.05000,4.057
30,self_attn.k_proj,0.0040841100,0.05000,4.601
30,self_attn.v_proj,0.0054587066,0.05000,4.635
30,self_attn.q_proj,0.0169272007,0.05000,4.642
30,self_attn.o_proj,0.0005446325,0.05000,1.495
30,mlp.gate_proj,0.0084495943,0.05000,2.342
30,mlp.up_proj,0.0091387503,0.05000,2.392
30,mlp.down_proj,0.0031828321,0.05000,4.041
31,self_attn.k_proj,0.0053448655,0.05000,4.500
31,self_attn.v_proj,0.0078707677,0.05000,4.542
31,self_attn.q_proj,0.0216492466,0.05000,4.579
31,self_attn.o_proj,0.0005467853,0.05000,1.504
31,mlp.gate_proj,0.0085940339,0.05000,2.143
31,mlp.up_proj,0.0097583888,0.05000,2.184
31,mlp.down_proj,0.0042225099,0.05000,3.889
32,self_attn.q_proj,0.0314087043,0.05000,4.484
32,self_attn.k_proj,0.0073322198,0.05000,4.576
32,self_attn.v_proj,0.0116100621,0.05000,4.598
32,self_attn.o_proj,0.0007367414,0.05000,1.519
32,mlp.up_proj,0.0105576177,0.05000,2.267
32,mlp.gate_proj,0.0090539314,0.05000,2.292
32,mlp.down_proj,0.0053806565,0.05000,3.901
33,self_attn.v_proj,0.0256433210,0.05000,4.733
33,self_attn.q_proj,0.0648465085,0.05000,4.771
33,self_attn.k_proj,0.0127558404,0.05000,4.786
33,self_attn.o_proj,0.0010363033,0.05000,1.516
33,mlp.gate_proj,0.0097379385,0.05000,2.270
33,mlp.up_proj,0.0115533778,0.05000,2.308
33,mlp.down_proj,0.0071556654,0.05000,3.931
34,self_attn.q_proj,0.0531877879,0.05000,4.851
34,self_attn.k_proj,0.0115072968,0.05000,4.920
34,self_attn.v_proj,0.0209449343,0.05000,4.921
34,self_attn.o_proj,0.0028885654,0.05000,1.596
34,mlp.gate_proj,0.0124351588,0.05000,2.418
34,mlp.up_proj,0.0137425665,0.05000,2.451
34,mlp.down_proj,0.0108865213,0.05000,4.057
35,self_attn.q_proj,0.0295646570,0.05000,4.796
35,self_attn.v_proj,0.0100270835,0.05000,4.859
35,self_attn.k_proj,0.0073113912,0.05000,4.915
35,self_attn.o_proj,0.0038638669,0.05000,1.477
35,mlp.gate_proj,0.0334204830,0.05000,2.426
35,mlp.up_proj,0.0434612004,0.05000,2.442
35,mlp.down_proj,0.0230910449,0.05000,3.926