File size: 11,667 Bytes
316b026
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
layer,module,loss,samples,damp,time
0,self_attn.v_proj,0.0000000281,0.05000,4.670
0,self_attn.k_proj,0.0000000335,0.05000,4.718
0,self_attn.q_proj,0.0000001225,0.05000,4.760
0,self_attn.o_proj,0.0000000638,0.05000,1.479
0,mlp.gate_proj,0.0000083850,0.05000,1.913
0,mlp.up_proj,0.0000072816,0.05000,1.916
0,mlp.down_proj,0.0000004270,0.05000,3.658
1,self_attn.v_proj,0.0000000562,0.05000,4.730
1,self_attn.q_proj,0.0000002102,0.05000,4.830
1,self_attn.k_proj,0.0000000580,0.05000,4.839
1,self_attn.o_proj,0.0000000563,0.05000,1.447
1,mlp.gate_proj,0.0002669175,0.05000,2.458
1,mlp.up_proj,0.0001192833,0.05000,2.516
1,mlp.down_proj,0.0000007279,0.05000,3.921
2,self_attn.v_proj,0.0000001416,0.05000,4.924
2,self_attn.q_proj,0.0000005442,0.05000,4.999
2,self_attn.k_proj,0.0000001599,0.05000,5.030
2,self_attn.o_proj,0.0000000774,0.05000,1.461
2,mlp.gate_proj,0.0004706195,0.05000,2.073
2,mlp.up_proj,0.0003679584,0.05000,2.095
2,mlp.down_proj,0.0000005296,0.05000,3.701
3,self_attn.v_proj,0.0000002583,0.05000,4.187
3,self_attn.k_proj,0.0000002787,0.05000,4.232
3,self_attn.q_proj,0.0000010549,0.05000,4.253
3,self_attn.o_proj,0.0000001789,0.05000,1.442
3,mlp.up_proj,0.0001859692,0.05000,1.984
3,mlp.gate_proj,0.0003880208,0.05000,2.003
3,mlp.down_proj,0.0000008290,0.05000,3.716
4,self_attn.v_proj,0.0000004453,0.05000,4.170
4,self_attn.q_proj,0.0000017503,0.05000,4.240
4,self_attn.k_proj,0.0000004929,0.05000,4.270
4,self_attn.o_proj,0.0000001841,0.05000,1.432
4,mlp.gate_proj,0.0003188560,0.05000,1.952
4,mlp.up_proj,0.0001058938,0.05000,1.977
4,mlp.down_proj,0.0000009877,0.05000,3.709
5,self_attn.v_proj,0.0000004747,0.05000,4.207
5,self_attn.k_proj,0.0000004808,0.05000,4.254
5,self_attn.q_proj,0.0000018170,0.05000,4.288
5,self_attn.o_proj,0.0000003358,0.05000,1.449
5,mlp.gate_proj,0.0001046843,0.05000,2.039
5,mlp.up_proj,0.0000414282,0.05000,2.070
5,mlp.down_proj,0.0000013782,0.05000,3.819
6,self_attn.v_proj,0.0000010176,0.05000,4.233
6,self_attn.q_proj,0.0000040303,0.05000,4.253
6,self_attn.k_proj,0.0000009895,0.05000,4.313
6,self_attn.o_proj,0.0000006622,0.05000,1.448
6,mlp.up_proj,0.0000503757,0.05000,2.014
6,mlp.gate_proj,0.0001110306,0.05000,2.035
6,mlp.down_proj,0.0000206670,0.05000,3.659
7,self_attn.k_proj,0.0000024329,0.05000,4.064
7,self_attn.q_proj,0.0000085931,0.05000,4.123
7,self_attn.v_proj,0.0000019982,0.05000,4.144
7,self_attn.o_proj,0.0000009256,0.05000,1.444
7,mlp.gate_proj,0.0001217811,0.05000,2.071
7,mlp.up_proj,0.0000577930,0.05000,2.092
7,mlp.down_proj,0.0000025102,0.05000,3.710
8,self_attn.v_proj,0.0000027738,0.05000,4.201
8,self_attn.q_proj,0.0000114360,0.05000,4.238
8,self_attn.k_proj,0.0000030762,0.05000,4.259
8,self_attn.o_proj,0.0000009674,0.05000,1.412
8,mlp.gate_proj,0.0000708923,0.05000,2.010
8,mlp.up_proj,0.0000563776,0.05000,2.033
8,mlp.down_proj,0.0000032682,0.05000,3.754
9,self_attn.v_proj,0.0000035083,0.05000,4.188
9,self_attn.k_proj,0.0000047883,0.05000,4.223
9,self_attn.q_proj,0.0000157103,0.05000,4.262
9,self_attn.o_proj,0.0000014919,0.05000,1.426
9,mlp.up_proj,0.0000711674,0.05000,2.011
9,mlp.gate_proj,0.0001187128,0.05000,2.031
9,mlp.down_proj,0.0000035517,0.05000,3.705
10,self_attn.k_proj,0.0000056646,0.05000,4.156
10,self_attn.q_proj,0.0000214887,0.05000,4.234
10,self_attn.v_proj,0.0000053920,0.05000,4.256
10,self_attn.o_proj,0.0000015159,0.05000,1.484
10,mlp.gate_proj,0.0000876355,0.05000,2.018
10,mlp.up_proj,0.0000582840,0.05000,2.034
10,mlp.down_proj,0.0000028648,0.05000,3.671
11,self_attn.k_proj,0.0000029401,0.05000,4.225
11,self_attn.v_proj,0.0000024203,0.05000,4.303
11,self_attn.q_proj,0.0000103068,0.05000,4.329
11,self_attn.o_proj,0.0000008888,0.05000,1.416
11,mlp.gate_proj,0.0000741166,0.05000,1.917
11,mlp.up_proj,0.0000566025,0.05000,1.941
11,mlp.down_proj,0.0000030860,0.05000,3.775
12,self_attn.k_proj,0.0000038472,0.05000,4.211
12,self_attn.q_proj,0.0000137759,0.05000,4.249
12,self_attn.v_proj,0.0000034272,0.05000,4.280
12,self_attn.o_proj,0.0000018546,0.05000,1.426
12,mlp.up_proj,0.0000580738,0.05000,2.071
12,mlp.gate_proj,0.0000685725,0.05000,2.094
12,mlp.down_proj,0.0000032841,0.05000,3.679
13,self_attn.v_proj,0.0000025200,0.05000,4.194
13,self_attn.q_proj,0.0000111981,0.05000,4.230
13,self_attn.k_proj,0.0000030891,0.05000,4.257
13,self_attn.o_proj,0.0000013833,0.05000,1.412
13,mlp.gate_proj,0.0000692568,0.05000,1.954
13,mlp.up_proj,0.0000647811,0.05000,1.973
13,mlp.down_proj,0.0000043834,0.05000,3.687
14,self_attn.q_proj,0.0000175752,0.05000,4.123
14,self_attn.k_proj,0.0000045994,0.05000,4.169
14,self_attn.v_proj,0.0000043383,0.05000,4.177
14,self_attn.o_proj,0.0000019435,0.05000,1.440
14,mlp.up_proj,0.0000720079,0.05000,1.933
14,mlp.gate_proj,0.0000766432,0.05000,1.958
14,mlp.down_proj,0.0000047673,0.05000,3.682
15,self_attn.k_proj,0.0000054351,0.05000,4.116
15,self_attn.v_proj,0.0000046581,0.05000,4.200
15,self_attn.q_proj,0.0000211606,0.05000,4.223
15,self_attn.o_proj,0.0000018997,0.05000,1.429
15,mlp.gate_proj,0.0000754266,0.05000,1.990
15,mlp.up_proj,0.0000740122,0.05000,2.015
15,mlp.down_proj,0.0000053597,0.05000,3.737
16,self_attn.q_proj,0.0000386672,0.05000,4.067
16,self_attn.k_proj,0.0000093139,0.05000,4.130
16,self_attn.v_proj,0.0000103499,0.05000,4.155
16,self_attn.o_proj,0.0000026699,0.05000,1.403
16,mlp.up_proj,0.0000855751,0.05000,2.041
16,mlp.gate_proj,0.0000930978,0.05000,2.061
16,mlp.down_proj,0.0000275581,0.05000,3.699
17,self_attn.q_proj,0.0000392438,0.05000,4.191
17,self_attn.k_proj,0.0000084165,0.05000,4.234
17,self_attn.v_proj,0.0000087300,0.05000,4.253
17,self_attn.o_proj,0.0000026301,0.05000,1.434
17,mlp.gate_proj,0.0000779856,0.05000,2.005
17,mlp.up_proj,0.0000724824,0.05000,2.024
17,mlp.down_proj,0.0000052021,0.05000,3.706
18,self_attn.q_proj,0.0000359746,0.05000,4.202
18,self_attn.v_proj,0.0000089155,0.05000,4.239
18,self_attn.k_proj,0.0000086172,0.05000,4.263
18,self_attn.o_proj,0.0000026857,0.05000,1.403
18,mlp.gate_proj,0.0000826436,0.05000,1.962
18,mlp.up_proj,0.0000786649,0.05000,1.977
18,mlp.down_proj,0.0000061125,0.05000,3.797
19,self_attn.k_proj,0.0000154321,0.05000,4.153
19,self_attn.v_proj,0.0000145288,0.05000,4.226
19,self_attn.q_proj,0.0000669893,0.05000,4.257
19,self_attn.o_proj,0.0000072475,0.05000,1.420
19,mlp.up_proj,0.0000763673,0.05000,1.958
19,mlp.gate_proj,0.0000795653,0.05000,1.983
19,mlp.down_proj,0.0000063999,0.05000,3.722
20,self_attn.k_proj,0.0000139938,0.05000,4.166
20,self_attn.v_proj,0.0000126008,0.05000,4.255
20,self_attn.q_proj,0.0000648620,0.05000,4.270
20,self_attn.o_proj,0.0000058633,0.05000,1.471
20,mlp.gate_proj,0.0000927177,0.05000,1.983
20,mlp.up_proj,0.0000909301,0.05000,2.005
20,mlp.down_proj,0.0000085226,0.05000,3.687
21,self_attn.v_proj,0.0000157023,0.05000,4.200
21,self_attn.q_proj,0.0000732686,0.05000,4.246
21,self_attn.k_proj,0.0000162576,0.05000,4.279
21,self_attn.o_proj,0.0000064938,0.05000,1.451
21,mlp.up_proj,0.0000938207,0.05000,2.061
21,mlp.gate_proj,0.0000953118,0.05000,2.069
21,mlp.down_proj,0.0000094180,0.05000,3.670
22,self_attn.v_proj,0.0000290647,0.05000,4.262
22,self_attn.q_proj,0.0001376551,0.05000,4.291
22,self_attn.k_proj,0.0000316722,0.05000,4.321
22,self_attn.o_proj,0.0000139498,0.05000,1.408
22,mlp.up_proj,0.0001028317,0.05000,1.960
22,mlp.gate_proj,0.0001130452,0.05000,1.982
22,mlp.down_proj,0.0000150741,0.05000,3.654
23,self_attn.q_proj,0.0001271593,0.05000,4.130
23,self_attn.k_proj,0.0000288629,0.05000,4.167
23,self_attn.v_proj,0.0000286624,0.05000,4.196
23,self_attn.o_proj,0.0000121074,0.05000,1.435
23,mlp.up_proj,0.0001123853,0.05000,1.978
23,mlp.gate_proj,0.0001318009,0.05000,2.002
23,mlp.down_proj,0.0000158443,0.05000,3.680
24,self_attn.k_proj,0.0000445231,0.05000,4.180
24,self_attn.v_proj,0.0000517156,0.05000,4.218
24,self_attn.q_proj,0.0001876024,0.05000,4.237
24,self_attn.o_proj,0.0000197756,0.05000,1.443
24,mlp.gate_proj,0.0001390896,0.05000,2.028
24,mlp.up_proj,0.0001163446,0.05000,2.052
24,mlp.down_proj,0.0000183784,0.05000,3.721
25,self_attn.k_proj,0.0000309183,0.05000,4.098
25,self_attn.v_proj,0.0000312976,0.05000,4.159
25,self_attn.q_proj,0.0001220958,0.05000,4.188
25,self_attn.o_proj,0.0000078953,0.05000,1.390
25,mlp.gate_proj,0.0001551162,0.05000,1.971
25,mlp.up_proj,0.0001274080,0.05000,1.993
25,mlp.down_proj,0.0000235486,0.05000,3.673
26,self_attn.v_proj,0.0000490624,0.05000,4.113
26,self_attn.k_proj,0.0000464167,0.05000,4.161
26,self_attn.q_proj,0.0001926069,0.05000,4.185
26,self_attn.o_proj,0.0000066098,0.05000,1.387
26,mlp.gate_proj,0.0001770693,0.05000,1.976
26,mlp.up_proj,0.0001508838,0.05000,1.975
26,mlp.down_proj,0.0000279487,0.05000,3.647
27,self_attn.k_proj,0.0000556290,0.05000,4.077
27,self_attn.q_proj,0.0002357567,0.05000,4.141
27,self_attn.v_proj,0.0000654981,0.05000,4.174
27,self_attn.o_proj,0.0000107555,0.05000,1.416
27,mlp.gate_proj,0.0001936240,0.05000,1.989
27,mlp.up_proj,0.0001708205,0.05000,2.008
27,mlp.down_proj,0.0000438858,0.05000,3.659
28,self_attn.k_proj,0.0000705007,0.05000,4.196
28,self_attn.v_proj,0.0000715719,0.05000,4.249
28,self_attn.q_proj,0.0002718625,0.05000,4.266
28,self_attn.o_proj,0.0000191249,0.05000,1.394
28,mlp.gate_proj,0.0002182023,0.05000,2.015
28,mlp.up_proj,0.0002019346,0.05000,2.031
28,mlp.down_proj,0.0000732407,0.05000,3.797
29,self_attn.k_proj,0.0001816301,0.05000,4.092
29,self_attn.q_proj,0.0007383766,0.05000,4.169
29,self_attn.v_proj,0.0002443326,0.05000,4.195
29,self_attn.o_proj,0.0000448207,0.05000,1.481
29,mlp.up_proj,0.0002562076,0.05000,1.942
29,mlp.gate_proj,0.0002613911,0.05000,1.958
29,mlp.down_proj,0.0000526463,0.05000,3.700
30,self_attn.v_proj,0.0002320336,0.05000,4.041
30,self_attn.k_proj,0.0001986943,0.05000,4.134
30,self_attn.q_proj,0.0007668271,0.05000,4.142
30,self_attn.o_proj,0.0000478573,0.05000,1.459
30,mlp.gate_proj,0.0002683302,0.05000,1.898
30,mlp.up_proj,0.0002727380,0.05000,1.920
30,mlp.down_proj,0.0000663494,0.05000,3.662
31,self_attn.q_proj,0.0010675234,0.05000,4.039
31,self_attn.k_proj,0.0003015756,0.05000,4.116
31,self_attn.v_proj,0.0003918752,0.05000,4.162
31,self_attn.o_proj,0.0000612622,0.05000,1.394
31,mlp.up_proj,0.0002870251,0.05000,2.027
31,mlp.gate_proj,0.0002682364,0.05000,2.032
31,mlp.down_proj,0.0000840589,0.05000,3.717
32,self_attn.q_proj,0.0016584295,0.05000,4.132
32,self_attn.v_proj,0.0006243234,0.05000,4.233
32,self_attn.k_proj,0.0004182623,0.05000,4.253
32,self_attn.o_proj,0.0001057172,0.05000,1.428
32,mlp.up_proj,0.0003151746,0.05000,1.983
32,mlp.gate_proj,0.0002878729,0.05000,2.006
32,mlp.down_proj,0.0001186975,0.05000,3.678
33,self_attn.v_proj,0.0016297800,0.05000,4.071
33,self_attn.k_proj,0.0008791048,0.05000,4.100
33,self_attn.q_proj,0.0041213692,0.05000,4.145
33,self_attn.o_proj,0.0001997127,0.05000,1.434
33,mlp.gate_proj,0.0003185476,0.05000,1.865
33,mlp.up_proj,0.0003529478,0.05000,1.881
33,mlp.down_proj,0.0001623388,0.05000,3.669
34,self_attn.q_proj,0.0027944569,0.05000,4.110
34,self_attn.k_proj,0.0006274358,0.05000,4.194
34,self_attn.v_proj,0.0009633384,0.05000,4.209
34,self_attn.o_proj,0.0002593519,0.05000,1.436
34,mlp.up_proj,0.0004195679,0.05000,1.947
34,mlp.gate_proj,0.0004068270,0.05000,1.970
34,mlp.down_proj,0.0003025797,0.05000,3.683
35,self_attn.v_proj,0.0003734207,0.05000,4.091
35,self_attn.k_proj,0.0003153036,0.05000,4.159
35,self_attn.q_proj,0.0011829730,0.05000,4.173
35,self_attn.o_proj,0.0001886140,0.05000,1.407
35,mlp.gate_proj,0.0009205786,0.05000,1.941
35,mlp.up_proj,0.0009714007,0.05000,1.939
35,mlp.down_proj,0.0008959190,0.05000,3.653