File size: 11,667 Bytes
217a67f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
layer,module,loss,samples,damp,time
0,self_attn.q_proj,0.0000001153,0.05000,5.049
0,self_attn.k_proj,0.0000000314,0.05000,5.046
0,self_attn.v_proj,0.0000000264,0.05000,4.904
0,self_attn.o_proj,0.0000000573,0.05000,1.352
0,mlp.up_proj,0.0000071981,0.05000,2.367
0,mlp.gate_proj,0.0000083000,0.05000,2.394
0,mlp.down_proj,0.0000004238,0.05000,3.637
1,self_attn.v_proj,0.0000000536,0.05000,4.725
1,self_attn.q_proj,0.0000002012,0.05000,4.760
1,self_attn.k_proj,0.0000000553,0.05000,4.801
1,self_attn.o_proj,0.0000000548,0.05000,1.421
1,mlp.gate_proj,0.0002712732,0.05000,2.455
1,mlp.up_proj,0.0001199079,0.05000,2.521
1,mlp.down_proj,0.0000006759,0.05000,3.629
2,self_attn.q_proj,0.0000005233,0.05000,4.418
2,self_attn.k_proj,0.0000001536,0.05000,4.511
2,self_attn.v_proj,0.0000001353,0.05000,4.519
2,self_attn.o_proj,0.0000000725,0.05000,1.336
2,mlp.up_proj,0.0003696989,0.05000,1.855
2,mlp.gate_proj,0.0004766503,0.05000,1.883
2,mlp.down_proj,0.0000005235,0.05000,3.528
3,self_attn.k_proj,0.0000002669,0.05000,4.047
3,self_attn.q_proj,0.0000010149,0.05000,4.106
3,self_attn.v_proj,0.0000002467,0.05000,4.133
3,self_attn.o_proj,0.0000001659,0.05000,1.379
3,mlp.up_proj,0.0001861880,0.05000,1.868
3,mlp.gate_proj,0.0003944701,0.05000,1.886
3,mlp.down_proj,0.0000007631,0.05000,3.533
4,self_attn.v_proj,0.0000004171,0.05000,4.027
4,self_attn.k_proj,0.0000004655,0.05000,4.080
4,self_attn.q_proj,0.0000016490,0.05000,4.107
4,self_attn.o_proj,0.0000001532,0.05000,1.332
4,mlp.gate_proj,0.0003126145,0.05000,1.964
4,mlp.up_proj,0.0001026281,0.05000,1.966
4,mlp.down_proj,0.0000008822,0.05000,3.620
5,self_attn.q_proj,0.0000017115,0.05000,3.967
5,self_attn.k_proj,0.0000004532,0.05000,4.103
5,self_attn.v_proj,0.0000004463,0.05000,4.119
5,self_attn.o_proj,0.0000002963,0.05000,1.334
5,mlp.up_proj,0.0000387881,0.05000,1.993
5,mlp.gate_proj,0.0000990348,0.05000,2.011
5,mlp.down_proj,0.0000012321,0.05000,3.554
6,self_attn.k_proj,0.0000009316,0.05000,4.038
6,self_attn.v_proj,0.0000009560,0.05000,4.090
6,self_attn.q_proj,0.0000037971,0.05000,4.108
6,self_attn.o_proj,0.0000006054,0.05000,1.350
6,mlp.gate_proj,0.0001076263,0.05000,1.906
6,mlp.up_proj,0.0000480459,0.05000,1.908
6,mlp.down_proj,0.0000274052,0.05000,3.514
7,self_attn.v_proj,0.0000018685,0.05000,3.939
7,self_attn.k_proj,0.0000022698,0.05000,4.055
7,self_attn.q_proj,0.0000079988,0.05000,4.084
7,self_attn.o_proj,0.0000008302,0.05000,1.393
7,mlp.up_proj,0.0000536222,0.05000,1.848
7,mlp.gate_proj,0.0001139974,0.05000,1.864
7,mlp.down_proj,0.0000020956,0.05000,3.518
8,self_attn.q_proj,0.0000104191,0.05000,4.057
8,self_attn.k_proj,0.0000028003,0.05000,4.107
8,self_attn.v_proj,0.0000025009,0.05000,4.129
8,self_attn.o_proj,0.0000008218,0.05000,1.354
8,mlp.gate_proj,0.0000628367,0.05000,1.945
8,mlp.up_proj,0.0000496221,0.05000,1.967
8,mlp.down_proj,0.0000027113,0.05000,3.601
9,self_attn.q_proj,0.0000145289,0.05000,4.056
9,self_attn.v_proj,0.0000031755,0.05000,4.076
9,self_attn.k_proj,0.0000043873,0.05000,4.110
9,self_attn.o_proj,0.0000013182,0.05000,1.376
9,mlp.gate_proj,0.0001058348,0.05000,1.904
9,mlp.up_proj,0.0000626613,0.05000,1.927
9,mlp.down_proj,0.0000029618,0.05000,3.480
10,self_attn.v_proj,0.0000047305,0.05000,4.056
10,self_attn.k_proj,0.0000051334,0.05000,4.069
10,self_attn.q_proj,0.0000193386,0.05000,4.110
10,self_attn.o_proj,0.0000011543,0.05000,1.348
10,mlp.up_proj,0.0000511433,0.05000,1.859
10,mlp.gate_proj,0.0000775700,0.05000,1.882
10,mlp.down_proj,0.0000024109,0.05000,3.514
11,self_attn.q_proj,0.0000095244,0.05000,4.037
11,self_attn.k_proj,0.0000027274,0.05000,4.079
11,self_attn.v_proj,0.0000021945,0.05000,4.111
11,self_attn.o_proj,0.0000007554,0.05000,1.336
11,mlp.gate_proj,0.0000644956,0.05000,1.840
11,mlp.up_proj,0.0000488933,0.05000,1.859
11,mlp.down_proj,0.0000025150,0.05000,3.515
12,self_attn.v_proj,0.0000031391,0.05000,3.958
12,self_attn.k_proj,0.0000035763,0.05000,4.089
12,self_attn.q_proj,0.0000127543,0.05000,4.105
12,self_attn.o_proj,0.0000016265,0.05000,1.372
12,mlp.gate_proj,0.0000602618,0.05000,1.913
12,mlp.up_proj,0.0000506296,0.05000,1.939
12,mlp.down_proj,0.0000027434,0.05000,3.544
13,self_attn.q_proj,0.0000102629,0.05000,3.938
13,self_attn.k_proj,0.0000028292,0.05000,4.006
13,self_attn.v_proj,0.0000023032,0.05000,4.034
13,self_attn.o_proj,0.0000012708,0.05000,1.333
13,mlp.gate_proj,0.0000599222,0.05000,1.867
13,mlp.up_proj,0.0000557118,0.05000,1.906
13,mlp.down_proj,0.0000036174,0.05000,3.530
14,self_attn.v_proj,0.0000038112,0.05000,3.993
14,self_attn.k_proj,0.0000041284,0.05000,4.062
14,self_attn.q_proj,0.0000156123,0.05000,4.090
14,self_attn.o_proj,0.0000016136,0.05000,1.422
14,mlp.up_proj,0.0000593381,0.05000,1.871
14,mlp.gate_proj,0.0000639253,0.05000,1.898
14,mlp.down_proj,0.0000036098,0.05000,3.527
15,self_attn.q_proj,0.0000175619,0.05000,4.078
15,self_attn.k_proj,0.0000045820,0.05000,4.136
15,self_attn.v_proj,0.0000038347,0.05000,4.163
15,self_attn.o_proj,0.0000016670,0.05000,1.349
15,mlp.gate_proj,0.0000615278,0.05000,1.954
15,mlp.up_proj,0.0000598616,0.05000,1.972
15,mlp.down_proj,0.0000040227,0.05000,3.588
16,self_attn.v_proj,0.0000083706,0.05000,4.065
16,self_attn.k_proj,0.0000076236,0.05000,4.114
16,self_attn.q_proj,0.0000314463,0.05000,4.151
16,self_attn.o_proj,0.0000021270,0.05000,1.349
16,mlp.gate_proj,0.0000710921,0.05000,1.947
16,mlp.up_proj,0.0000644939,0.05000,1.966
16,mlp.down_proj,0.0000117242,0.05000,3.546
17,self_attn.k_proj,0.0000067831,0.05000,4.004
17,self_attn.q_proj,0.0000312820,0.05000,4.069
17,self_attn.v_proj,0.0000068218,0.05000,4.087
17,self_attn.o_proj,0.0000022641,0.05000,1.352
17,mlp.up_proj,0.0000593657,0.05000,1.981
17,mlp.gate_proj,0.0000645823,0.05000,1.997
17,mlp.down_proj,0.0000040265,0.05000,3.558
18,self_attn.k_proj,0.0000076790,0.05000,4.045
18,self_attn.q_proj,0.0000317306,0.05000,4.091
18,self_attn.v_proj,0.0000078188,0.05000,4.120
18,self_attn.o_proj,0.0000024979,0.05000,1.353
18,mlp.gate_proj,0.0000683760,0.05000,1.927
18,mlp.up_proj,0.0000643242,0.05000,1.943
18,mlp.down_proj,0.0000048352,0.05000,3.559
19,self_attn.v_proj,0.0000123465,0.05000,4.063
19,self_attn.q_proj,0.0000572430,0.05000,4.142
19,self_attn.k_proj,0.0000133796,0.05000,4.159
19,self_attn.o_proj,0.0000057088,0.05000,1.345
19,mlp.gate_proj,0.0000665518,0.05000,1.862
19,mlp.up_proj,0.0000632745,0.05000,1.862
19,mlp.down_proj,0.0000050105,0.05000,3.549
20,self_attn.k_proj,0.0000115854,0.05000,4.079
20,self_attn.q_proj,0.0000533304,0.05000,4.150
20,self_attn.v_proj,0.0000104790,0.05000,4.157
20,self_attn.o_proj,0.0000044671,0.05000,1.360
20,mlp.up_proj,0.0000710240,0.05000,1.917
20,mlp.gate_proj,0.0000730992,0.05000,1.937
20,mlp.down_proj,0.0000056272,0.05000,3.524
21,self_attn.k_proj,0.0000134538,0.05000,4.045
21,self_attn.v_proj,0.0000129916,0.05000,4.093
21,self_attn.q_proj,0.0000596051,0.05000,4.095
21,self_attn.o_proj,0.0000049458,0.05000,1.420
21,mlp.up_proj,0.0000742268,0.05000,1.945
21,mlp.gate_proj,0.0000773761,0.05000,1.968
21,mlp.down_proj,0.0000066130,0.05000,3.565
22,self_attn.q_proj,0.0001142109,0.05000,4.051
22,self_attn.k_proj,0.0000264698,0.05000,4.105
22,self_attn.v_proj,0.0000240881,0.05000,4.129
22,self_attn.o_proj,0.0000098465,0.05000,1.356
22,mlp.up_proj,0.0000840972,0.05000,1.850
22,mlp.gate_proj,0.0000945803,0.05000,1.859
22,mlp.down_proj,0.0000115707,0.05000,3.587
23,self_attn.q_proj,0.0001076663,0.05000,4.072
23,self_attn.v_proj,0.0000245380,0.05000,4.140
23,self_attn.k_proj,0.0000248362,0.05000,4.156
23,self_attn.o_proj,0.0000091614,0.05000,1.375
23,mlp.up_proj,0.0001068179,0.05000,1.993
23,mlp.gate_proj,0.0001266348,0.05000,2.014
23,mlp.down_proj,0.0000158050,0.05000,3.526
24,self_attn.v_proj,0.0000519260,0.05000,4.015
24,self_attn.q_proj,0.0001842994,0.05000,4.079
24,self_attn.k_proj,0.0000439712,0.05000,4.112
24,self_attn.o_proj,0.0000171693,0.05000,1.334
24,mlp.gate_proj,0.0001448973,0.05000,1.835
24,mlp.up_proj,0.0001207576,0.05000,1.858
24,mlp.down_proj,0.0000206206,0.05000,3.523
25,self_attn.q_proj,0.0001284047,0.05000,3.922
25,self_attn.k_proj,0.0000323777,0.05000,4.020
25,self_attn.v_proj,0.0000331042,0.05000,4.052
25,self_attn.o_proj,0.0000070419,0.05000,1.324
25,mlp.up_proj,0.0001367776,0.05000,1.950
25,mlp.gate_proj,0.0001674050,0.05000,1.948
25,mlp.down_proj,0.0000251580,0.05000,3.574
26,self_attn.v_proj,0.0000526688,0.05000,4.112
26,self_attn.q_proj,0.0002068482,0.05000,4.155
26,self_attn.k_proj,0.0000496134,0.05000,4.170
26,self_attn.o_proj,0.0000051848,0.05000,1.350
26,mlp.gate_proj,0.0001928277,0.05000,1.937
26,mlp.up_proj,0.0001641577,0.05000,1.944
26,mlp.down_proj,0.0000290895,0.05000,3.588
27,self_attn.v_proj,0.0000689536,0.05000,4.199
27,self_attn.q_proj,0.0002489993,0.05000,4.245
27,self_attn.k_proj,0.0000588823,0.05000,4.284
27,self_attn.o_proj,0.0000074859,0.05000,1.371
27,mlp.gate_proj,0.0002114797,0.05000,1.897
27,mlp.up_proj,0.0001867100,0.05000,1.921
27,mlp.down_proj,0.0000425167,0.05000,3.522
28,self_attn.v_proj,0.0000753414,0.05000,4.083
28,self_attn.k_proj,0.0000747432,0.05000,4.138
28,self_attn.q_proj,0.0002869514,0.05000,4.171
28,self_attn.o_proj,0.0000164335,0.05000,1.460
28,mlp.gate_proj,0.0002405842,0.05000,1.938
28,mlp.up_proj,0.0002226133,0.05000,1.956
28,mlp.down_proj,0.0000668795,0.05000,3.585
29,self_attn.q_proj,0.0007534960,0.05000,3.975
29,self_attn.k_proj,0.0001845237,0.05000,4.050
29,self_attn.v_proj,0.0002504832,0.05000,4.091
29,self_attn.o_proj,0.0000320753,0.05000,1.371
29,mlp.gate_proj,0.0002801606,0.05000,1.924
29,mlp.up_proj,0.0002734993,0.05000,1.944
29,mlp.down_proj,0.0000491190,0.05000,3.589
30,self_attn.k_proj,0.0002038838,0.05000,4.011
30,self_attn.v_proj,0.0002371485,0.05000,4.099
30,self_attn.q_proj,0.0007788552,0.05000,4.139
30,self_attn.o_proj,0.0000364104,0.05000,1.333
30,mlp.up_proj,0.0002901775,0.05000,1.967
30,mlp.gate_proj,0.0002870724,0.05000,1.969
30,mlp.down_proj,0.0000643464,0.05000,3.535
31,self_attn.k_proj,0.0003022571,0.05000,4.016
31,self_attn.q_proj,0.0010716498,0.05000,4.085
31,self_attn.v_proj,0.0003914933,0.05000,4.107
31,self_attn.o_proj,0.0000348235,0.05000,1.387
31,mlp.gate_proj,0.0002821420,0.05000,1.868
31,mlp.up_proj,0.0003007668,0.05000,1.885
31,mlp.down_proj,0.0000811069,0.05000,3.580
32,self_attn.v_proj,0.0006320391,0.05000,4.059
32,self_attn.k_proj,0.0004232653,0.05000,4.098
32,self_attn.q_proj,0.0016716093,0.05000,4.124
32,self_attn.o_proj,0.0000694807,0.05000,1.362
32,mlp.up_proj,0.0003274428,0.05000,1.839
32,mlp.gate_proj,0.0002995185,0.05000,1.859
32,mlp.down_proj,0.0001096588,0.05000,3.546
33,self_attn.q_proj,0.0040160338,0.05000,4.085
33,self_attn.v_proj,0.0015813031,0.05000,4.130
33,self_attn.k_proj,0.0008651940,0.05000,4.169
33,self_attn.o_proj,0.0001325059,0.05000,1.345
33,mlp.up_proj,0.0003519209,0.05000,1.878
33,mlp.gate_proj,0.0003186744,0.05000,1.903
33,mlp.down_proj,0.0001409312,0.05000,3.574
34,self_attn.v_proj,0.0009027103,0.05000,4.124
34,self_attn.q_proj,0.0026703046,0.05000,4.195
34,self_attn.k_proj,0.0005990280,0.05000,4.223
34,self_attn.o_proj,0.0001643766,0.05000,1.372
34,mlp.up_proj,0.0003952665,0.05000,1.916
34,mlp.gate_proj,0.0003853188,0.05000,1.932
34,mlp.down_proj,0.0002338718,0.05000,3.562
35,self_attn.v_proj,0.0003357541,0.05000,3.973
35,self_attn.q_proj,0.0010748606,0.05000,4.043
35,self_attn.k_proj,0.0002876785,0.05000,4.094
35,self_attn.o_proj,0.0001355600,0.05000,1.385
35,mlp.gate_proj,0.0008457840,0.05000,1.880
35,mlp.up_proj,0.0008631636,0.05000,1.895
35,mlp.down_proj,0.0006807271,0.05000,3.547