File size: 10,367 Bytes
6cd907c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
layer,module,loss,samples,damp,time
0,mlp.gate_proj,0.0002078072,0.05000,3.973
0,mlp.up_proj,0.0002951170,0.05000,3.987
0,mlp.down_proj,0.0001080021,0.05000,7.187
0,self_attn.q_proj,0.0002751954,0.05000,5.447
0,self_attn.v_proj,0.0000291499,0.05000,5.453
0,self_attn.o_proj,0.0001638678,0.05000,5.458
0,self_attn.k_proj,0.0000131150,0.05000,5.470
1,mlp.gate_proj,0.0002358835,0.05000,3.468
1,mlp.up_proj,0.0002490052,0.05000,3.477
1,mlp.down_proj,0.0000623201,0.05000,6.719
1,self_attn.k_proj,0.0021598425,0.05000,5.184
1,self_attn.o_proj,0.0000640641,0.05000,5.191
1,self_attn.v_proj,0.0027478211,0.05000,5.198
1,self_attn.q_proj,0.0021248550,0.05000,5.201
2,mlp.up_proj,0.0002725317,0.05000,3.898
2,mlp.gate_proj,0.0002710722,0.05000,3.913
2,mlp.down_proj,0.0000934939,0.05000,6.735
2,self_attn.q_proj,0.0013359398,0.05000,5.192
2,self_attn.k_proj,0.0013283169,0.05000,5.196
2,self_attn.v_proj,0.0015758836,0.05000,5.199
2,self_attn.o_proj,0.0000310727,0.05000,5.206
3,mlp.gate_proj,0.0005309442,0.05000,3.730
3,mlp.up_proj,0.0004692136,0.05000,3.741
3,mlp.down_proj,0.0001151627,0.05000,6.528
3,self_attn.k_proj,0.0015043542,0.05000,5.050
3,self_attn.q_proj,0.0014609831,0.05000,5.212
3,self_attn.o_proj,0.0000424297,0.05000,5.217
3,self_attn.v_proj,0.0015774138,0.05000,5.225
4,mlp.gate_proj,0.0006711201,0.05000,3.650
4,mlp.up_proj,0.0006097663,0.05000,3.668
4,mlp.down_proj,0.0001310055,0.05000,6.790
4,self_attn.k_proj,0.0011175845,0.05000,5.169
4,self_attn.o_proj,0.0000302304,0.05000,5.176
4,self_attn.v_proj,0.0013201939,0.05000,5.178
4,self_attn.q_proj,0.0010903492,0.05000,5.183
5,mlp.gate_proj,0.0008319342,0.05000,3.374
5,mlp.up_proj,0.0007571966,0.05000,3.555
5,mlp.down_proj,0.0001388114,0.05000,6.471
5,self_attn.o_proj,0.0000473348,0.05000,5.101
5,self_attn.k_proj,0.0012779406,0.05000,5.275
5,self_attn.q_proj,0.0012068739,0.05000,5.281
5,self_attn.v_proj,0.0013603305,0.05000,5.284
6,mlp.gate_proj,0.0010110473,0.05000,3.886
6,mlp.up_proj,0.0009320581,0.05000,3.909
6,mlp.down_proj,0.0001813578,0.05000,7.282
6,self_attn.o_proj,0.0000507709,0.05000,5.565
6,self_attn.q_proj,0.0011634061,0.05000,5.583
6,self_attn.k_proj,0.0014005803,0.05000,5.584
6,self_attn.v_proj,0.0012214420,0.05000,5.588
7,mlp.gate_proj,0.0010606032,0.05000,3.618
7,mlp.up_proj,0.0009992647,0.05000,3.634
7,mlp.down_proj,0.0002215556,0.05000,6.559
7,self_attn.v_proj,0.0011570467,0.05000,5.247
7,self_attn.o_proj,0.0000582520,0.05000,5.254
7,self_attn.k_proj,0.0012879103,0.05000,5.258
7,self_attn.q_proj,0.0010551224,0.05000,5.269
8,mlp.gate_proj,0.0011260961,0.05000,3.717
8,mlp.up_proj,0.0010257109,0.05000,3.724
8,mlp.down_proj,0.0002213782,0.05000,6.778
8,self_attn.q_proj,0.0007443796,0.05000,5.545
8,self_attn.o_proj,0.0000838215,0.05000,5.553
8,self_attn.k_proj,0.0009184965,0.05000,5.581
8,self_attn.v_proj,0.0008096808,0.05000,5.590
9,mlp.up_proj,0.0011578900,0.05000,4.170
9,mlp.gate_proj,0.0012150619,0.05000,4.172
9,mlp.down_proj,0.0002476013,0.05000,7.254
9,self_attn.v_proj,0.0000515016,0.05000,5.568
9,self_attn.o_proj,0.0000509674,0.05000,5.584
9,self_attn.k_proj,0.0000803375,0.05000,5.585
9,self_attn.q_proj,0.0008326407,0.05000,5.593
10,mlp.up_proj,0.0011782159,0.05000,4.128
10,mlp.gate_proj,0.0012158477,0.05000,4.132
10,mlp.down_proj,0.0002546134,0.05000,7.238
10,self_attn.v_proj,0.0011085082,0.05000,5.713
10,self_attn.q_proj,0.0010387903,0.05000,5.720
10,self_attn.o_proj,0.0000718013,0.05000,5.722
10,self_attn.k_proj,0.0013175182,0.05000,5.734
11,mlp.gate_proj,0.0011684080,0.05000,3.892
11,mlp.up_proj,0.0011704125,0.05000,3.898
11,mlp.down_proj,0.0002768170,0.05000,6.982
11,self_attn.o_proj,0.0000842501,0.05000,5.694
11,self_attn.q_proj,0.0007722052,0.05000,5.701
11,self_attn.k_proj,0.0009454727,0.05000,5.707
11,self_attn.v_proj,0.0008207958,0.05000,5.712
12,mlp.gate_proj,0.0010926098,0.05000,4.550
12,mlp.up_proj,0.0011160669,0.05000,4.566
12,mlp.down_proj,0.0002585554,0.05000,7.980
12,self_attn.k_proj,0.0010605023,0.05000,5.557
12,self_attn.o_proj,0.0000954078,0.05000,5.570
12,self_attn.v_proj,0.0011184107,0.05000,5.594
12,self_attn.q_proj,0.0009598745,0.05000,5.598
13,mlp.gate_proj,0.0010929265,0.05000,4.198
13,mlp.up_proj,0.0011373865,0.05000,4.201
13,mlp.down_proj,0.0002754575,0.05000,7.376
13,self_attn.o_proj,0.0001053596,0.05000,5.857
13,self_attn.v_proj,0.0007298129,0.05000,5.866
13,self_attn.k_proj,0.0009108803,0.05000,5.884
13,self_attn.q_proj,0.0007128777,0.05000,5.896
14,mlp.gate_proj,0.0010234204,0.05000,4.505
14,mlp.up_proj,0.0011231728,0.05000,4.527
14,mlp.down_proj,0.0002988453,0.05000,8.039
14,self_attn.q_proj,0.0007725519,0.05000,5.838
14,self_attn.k_proj,0.0008887160,0.05000,5.839
14,self_attn.o_proj,0.0001257012,0.05000,5.849
14,self_attn.v_proj,0.0007643019,0.05000,5.862
15,mlp.gate_proj,0.0010563693,0.05000,4.093
15,mlp.up_proj,0.0010843390,0.05000,4.112
15,mlp.down_proj,0.0002757835,0.05000,7.433
15,self_attn.o_proj,0.0001515282,0.05000,6.018
15,self_attn.k_proj,0.0006521820,0.05000,6.034
15,self_attn.v_proj,0.0005368739,0.05000,6.046
15,self_attn.q_proj,0.0005058027,0.05000,6.051
16,mlp.gate_proj,0.0012604057,0.05000,4.138
16,mlp.up_proj,0.0012323827,0.05000,4.160
16,mlp.down_proj,0.0002975532,0.05000,7.626
16,self_attn.k_proj,0.0000700808,0.05000,5.996
16,self_attn.q_proj,0.0007158842,0.05000,6.021
16,self_attn.v_proj,0.0000399206,0.05000,6.019
16,self_attn.o_proj,0.0001237841,0.05000,6.025
17,mlp.gate_proj,0.0015615467,0.05000,4.348
17,mlp.up_proj,0.0014232672,0.05000,4.384
17,mlp.down_proj,0.0003209173,0.05000,8.060
17,self_attn.k_proj,0.0000623059,0.05000,6.262
17,self_attn.v_proj,0.0000560221,0.05000,6.262
17,self_attn.o_proj,0.0001281610,0.05000,6.272
17,self_attn.q_proj,0.0007087458,0.05000,6.279
18,mlp.gate_proj,0.0014200928,0.05000,4.596
18,mlp.up_proj,0.0013662458,0.05000,4.619
18,mlp.down_proj,0.0003235204,0.05000,8.186
18,self_attn.k_proj,0.0009590061,0.05000,6.615
18,self_attn.o_proj,0.0001038908,0.05000,6.643
18,self_attn.v_proj,0.0006829601,0.05000,6.646
18,self_attn.q_proj,0.0006769612,0.05000,6.649
19,mlp.gate_proj,0.0013718377,0.05000,4.395
19,mlp.up_proj,0.0014014285,0.05000,4.661
19,mlp.down_proj,0.0003881076,0.05000,8.269
19,self_attn.k_proj,0.0011457390,0.05000,6.411
19,self_attn.q_proj,0.0008744106,0.05000,6.428
19,self_attn.o_proj,0.0001004489,0.05000,6.426
19,self_attn.v_proj,0.0009188803,0.05000,6.433
20,mlp.gate_proj,0.0013132144,0.05000,4.429
20,mlp.up_proj,0.0013869754,0.05000,4.452
20,mlp.down_proj,0.0004866157,0.05000,8.146
20,self_attn.k_proj,0.0009195520,0.05000,6.902
20,self_attn.o_proj,0.0001352315,0.05000,6.916
20,self_attn.q_proj,0.0006837781,0.05000,6.921
20,self_attn.v_proj,0.0006840548,0.05000,6.932
21,mlp.gate_proj,0.0011899443,0.05000,4.188
21,mlp.up_proj,0.0012786226,0.05000,4.204
21,mlp.down_proj,0.0005073890,0.05000,7.768
21,self_attn.o_proj,0.0002255129,0.05000,6.370
21,self_attn.k_proj,0.0007886602,0.05000,6.380
21,self_attn.q_proj,0.0006903726,0.05000,6.391
21,self_attn.v_proj,0.0007393014,0.05000,6.388
22,mlp.gate_proj,0.0013304233,0.05000,4.497
22,mlp.up_proj,0.0014272174,0.05000,4.509
22,mlp.down_proj,0.0006732390,0.05000,8.299
22,self_attn.k_proj,0.0000527559,0.05000,6.658
22,self_attn.o_proj,0.0002934376,0.05000,6.665
22,self_attn.v_proj,0.0000915629,0.05000,6.692
22,self_attn.q_proj,0.0008205932,0.05000,6.703
23,mlp.gate_proj,0.0016920954,0.05000,4.612
23,mlp.up_proj,0.0018172881,0.05000,4.613
23,mlp.down_proj,0.0009602727,0.05000,8.565
23,self_attn.o_proj,0.0002255482,0.05000,6.927
23,self_attn.k_proj,0.0010880575,0.05000,6.935
23,self_attn.q_proj,0.0008466790,0.05000,6.943
23,self_attn.v_proj,0.0008170104,0.05000,6.953
24,mlp.gate_proj,0.0019866878,0.05000,4.705
24,mlp.up_proj,0.0021496204,0.05000,4.735
24,mlp.down_proj,0.0016623680,0.05000,8.530
24,self_attn.k_proj,0.0009090479,0.05000,7.095
24,self_attn.o_proj,0.0003707870,0.05000,7.118
24,self_attn.q_proj,0.0007286620,0.05000,7.141
24,self_attn.v_proj,0.0007401827,0.05000,7.147
25,mlp.gate_proj,0.0024015805,0.05000,5.336
25,mlp.up_proj,0.0026404986,0.05000,5.350
25,mlp.down_proj,0.0034734985,0.05000,9.255
25,self_attn.o_proj,0.0006143702,0.05000,7.027
25,self_attn.v_proj,0.0007805540,0.05000,7.053
25,self_attn.q_proj,0.0007903687,0.05000,7.057
25,self_attn.k_proj,0.0010052844,0.05000,7.063
26,mlp.gate_proj,0.0027684612,0.05000,4.732
26,mlp.up_proj,0.0031322542,0.05000,4.748
26,mlp.down_proj,0.0031763896,0.05000,8.622
26,self_attn.k_proj,0.0019508587,0.05000,6.850
26,self_attn.q_proj,0.0013604624,0.05000,6.857
26,self_attn.v_proj,0.0013274833,0.05000,6.865
26,self_attn.o_proj,0.0008038559,0.05000,6.879
27,mlp.up_proj,0.0037468435,0.05000,5.283
27,mlp.gate_proj,0.0032518848,0.05000,5.293
27,mlp.down_proj,0.0044657355,0.05000,9.521
27,self_attn.o_proj,0.0006732306,0.05000,6.937
27,self_attn.v_proj,0.0011655967,0.05000,6.945
27,self_attn.q_proj,0.0012446561,0.05000,6.955
27,self_attn.k_proj,0.0013658012,0.05000,6.962
28,mlp.gate_proj,0.0037523353,0.05000,4.865
28,mlp.up_proj,0.0044277377,0.05000,4.887
28,mlp.down_proj,0.0062658060,0.05000,8.965
28,self_attn.k_proj,0.0021060843,0.05000,8.730
28,self_attn.v_proj,0.0015762776,0.05000,8.754
28,self_attn.o_proj,0.0016443902,0.05000,8.777
28,self_attn.q_proj,0.0015356449,0.05000,8.784
29,mlp.up_proj,0.0052878110,0.05000,5.066
29,mlp.gate_proj,0.0043512575,0.05000,5.077
29,mlp.down_proj,0.0094577564,0.05000,9.052
29,self_attn.v_proj,0.0008256575,0.05000,7.410
29,self_attn.o_proj,0.0014812295,0.05000,7.426
29,self_attn.k_proj,0.0000835427,0.05000,7.436
29,self_attn.q_proj,0.0016312229,0.05000,7.449
30,mlp.gate_proj,0.0052317987,0.05000,5.402
30,mlp.up_proj,0.0063166269,0.05000,5.429
30,mlp.down_proj,0.0177200560,0.05000,9.493
30,self_attn.q_proj,0.0023054092,0.05000,7.067
30,self_attn.k_proj,0.0000905518,0.05000,7.075
30,self_attn.v_proj,0.0023724051,0.05000,7.086
30,self_attn.o_proj,0.0039247042,0.05000,7.094
31,mlp.up_proj,0.0094467415,0.05000,4.988
31,mlp.gate_proj,0.0084637072,0.05000,5.021
31,mlp.down_proj,0.0731122261,0.05000,8.989
31,self_attn.o_proj,0.0030262335,0.05000,6.952
31,self_attn.v_proj,0.0004133850,0.05000,6.961
31,self_attn.q_proj,0.0017336883,0.05000,6.967
31,self_attn.k_proj,0.0000637945,0.05000,6.973