File size: 10,845 Bytes
f6f0b6f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
layer,module,loss,samples,damp,time
0,self_attn.o_proj,failsafe(rtn): 0.0018692,0.00000,0.138
0,self_attn.q_proj,0.0000025054,0.05000,1.933
0,self_attn.v_proj,0.0000002673,0.05000,1.967
0,self_attn.k_proj,0.0000001197,0.05000,1.976
0,self_attn.o_gate,0.0000053353,0.05000,0.394
0,mlp.up_proj,0.0000028122,0.05000,2.091
0,mlp.gate_proj,0.0000019878,0.05000,2.116
0,mlp.down_proj,0.0000014094,0.05000,3.193
1,self_attn.q_proj,0.0000198860,0.05000,2.327
1,self_attn.o_proj,0.0000005668,0.05000,2.348
1,self_attn.k_proj,0.0000202024,0.05000,2.356
1,self_attn.v_proj,0.0000256484,0.05000,2.360
1,mlp.up_proj,0.0000018768,0.05000,1.999
1,mlp.gate_proj,0.0000017809,0.05000,2.000
1,mlp.down_proj,0.0000005710,0.05000,3.093
2,self_attn.k_proj,0.0000100614,0.05000,2.109
2,self_attn.q_proj,0.0000101246,0.05000,2.183
2,self_attn.o_proj,0.0000002428,0.05000,2.218
2,self_attn.v_proj,0.0000119164,0.05000,2.239
2,mlp.gate_proj,0.0000022318,0.05000,1.951
2,mlp.up_proj,0.0000022416,0.05000,2.021
2,mlp.down_proj,0.0000009728,0.05000,3.080
3,self_attn.q_proj,0.0000125645,0.05000,2.153
3,self_attn.v_proj,0.0000135237,0.05000,2.170
3,self_attn.o_proj,0.0000003379,0.05000,2.181
3,self_attn.k_proj,0.0000128744,0.05000,2.214
3,mlp.gate_proj,0.0000045511,0.05000,1.978
3,mlp.up_proj,0.0000040209,0.05000,2.037
3,mlp.down_proj,0.0000014397,0.05000,3.115
4,self_attn.q_proj,0.0000099334,0.05000,2.707
4,self_attn.v_proj,0.0000120185,0.05000,2.720
4,self_attn.o_proj,0.0000002742,0.05000,2.739
4,self_attn.k_proj,0.0000101482,0.05000,2.742
4,mlp.up_proj,0.0000054010,0.05000,1.751
4,mlp.gate_proj,0.0000059484,0.05000,1.881
4,mlp.down_proj,0.0000015523,0.05000,2.930
5,self_attn.q_proj,0.0000107690,0.05000,2.388
5,self_attn.v_proj,0.0000121319,0.05000,2.565
5,self_attn.k_proj,0.0000113703,0.05000,2.571
5,self_attn.o_proj,0.0000004462,0.05000,2.596
5,mlp.gate_proj,0.0000073629,0.05000,2.027
5,mlp.up_proj,0.0000066999,0.05000,2.040
5,mlp.down_proj,0.0000016318,0.05000,3.135
6,self_attn.k_proj,0.0000124642,0.05000,2.605
6,self_attn.q_proj,0.0000104139,0.05000,2.671
6,self_attn.o_proj,0.0000004455,0.05000,2.673
6,self_attn.v_proj,0.0000109036,0.05000,2.687
6,mlp.up_proj,0.0000082335,0.05000,2.047
6,mlp.gate_proj,0.0000089300,0.05000,2.052
6,mlp.down_proj,0.0000018200,0.05000,3.152
7,self_attn.k_proj,0.0000113131,0.05000,2.759
7,self_attn.v_proj,0.0000101746,0.05000,2.777
7,self_attn.o_proj,0.0000004802,0.05000,2.778
7,self_attn.q_proj,0.0000092834,0.05000,2.784
7,mlp.up_proj,0.0000086123,0.05000,2.016
7,mlp.gate_proj,0.0000091405,0.05000,2.034
7,mlp.down_proj,0.0000021072,0.05000,3.119
8,self_attn.o_proj,0.0000007475,0.05000,2.422
8,self_attn.v_proj,0.0000070005,0.05000,2.433
8,self_attn.q_proj,0.0000064478,0.05000,2.495
8,self_attn.k_proj,0.0000079277,0.05000,2.502
8,mlp.up_proj,0.0000088850,0.05000,3.785
8,mlp.gate_proj,0.0000097536,0.05000,3.857
8,mlp.down_proj,0.0000020750,0.05000,6.063
9,self_attn.o_proj,failsafe(rtn): 0.0020905,0.00000,0.102
9,self_attn.q_proj,0.0000078441,0.05000,1.728
9,self_attn.k_proj,0.0000007585,0.05000,1.730
9,self_attn.v_proj,0.0000004848,0.05000,1.765
9,self_attn.o_gate,0.0000054848,0.05000,0.385
9,mlp.gate_proj,0.0000107061,0.05000,2.022
9,mlp.up_proj,0.0000102051,0.05000,2.036
9,mlp.down_proj,0.0000026359,0.05000,3.114
10,self_attn.o_proj,0.0000006941,0.05000,2.442
10,self_attn.k_proj,0.0000118222,0.05000,2.463
10,self_attn.q_proj,0.0000094431,0.05000,2.511
10,self_attn.v_proj,0.0000100697,0.05000,2.516
10,mlp.up_proj,0.0000102459,0.05000,1.732
10,mlp.gate_proj,0.0000105735,0.05000,1.865
10,mlp.down_proj,0.0000024634,0.05000,2.896
11,self_attn.q_proj,0.0000068717,0.05000,2.337
11,self_attn.o_proj,0.0000007806,0.05000,2.350
11,self_attn.v_proj,0.0000072992,0.05000,2.433
11,self_attn.k_proj,0.0000083771,0.05000,2.435
11,mlp.gate_proj,0.0000102914,0.05000,2.056
11,mlp.up_proj,0.0000103112,0.05000,2.066
11,mlp.down_proj,0.0000026183,0.05000,3.164
12,self_attn.k_proj,0.0000095307,0.05000,2.719
12,self_attn.o_proj,0.0000009991,0.05000,2.808
12,self_attn.q_proj,0.0000086598,0.05000,2.811
12,self_attn.v_proj,0.0000100696,0.05000,2.829
12,mlp.up_proj,0.0000101439,0.05000,2.039
12,mlp.gate_proj,0.0000099271,0.05000,2.045
12,mlp.down_proj,0.0000026657,0.05000,3.134
13,self_attn.o_proj,0.0000010317,0.05000,2.791
13,self_attn.q_proj,0.0000065795,0.05000,2.790
13,self_attn.v_proj,0.0000067338,0.05000,2.802
13,self_attn.k_proj,0.0000084106,0.05000,2.804
13,mlp.up_proj,0.0000104438,0.05000,2.054
13,mlp.gate_proj,0.0000100330,0.05000,2.062
13,mlp.down_proj,0.0000028064,0.05000,3.163
14,self_attn.k_proj,0.0000081929,0.05000,2.847
14,self_attn.o_proj,0.0000013027,0.05000,2.863
14,self_attn.q_proj,0.0000071310,0.05000,2.869
14,self_attn.v_proj,0.0000070400,0.05000,2.872
14,mlp.gate_proj,0.0000094866,0.05000,1.740
14,mlp.up_proj,0.0000104130,0.05000,1.846
14,mlp.down_proj,0.0000031477,0.05000,2.910
15,self_attn.o_proj,0.0000017505,0.05000,2.218
15,self_attn.q_proj,0.0000046234,0.05000,2.263
15,self_attn.v_proj,0.0000049011,0.05000,2.275
15,self_attn.k_proj,0.0000059547,0.05000,2.337
15,mlp.gate_proj,0.0000098456,0.05000,1.961
15,mlp.up_proj,0.0000101065,0.05000,2.010
15,mlp.down_proj,0.0000027747,0.05000,3.079
16,self_attn.o_proj,failsafe(rtn): 0.0021057,0.00000,0.100
16,self_attn.q_proj,0.0000072823,0.05000,1.562
16,self_attn.v_proj,0.0000004060,0.05000,1.615
16,self_attn.k_proj,0.0000007123,0.05000,1.691
16,self_attn.o_gate,0.0000042507,0.05000,0.451
16,mlp.up_proj,0.0000126712,0.05000,1.675
16,mlp.gate_proj,0.0000129596,0.05000,1.690
16,mlp.down_proj,0.0000040638,0.05000,2.776
17,self_attn.o_proj,failsafe(rtn): 0.0021667,0.00000,0.101
17,self_attn.q_proj,0.0000075214,0.05000,1.732
17,self_attn.v_proj,0.0000005942,0.05000,1.763
17,self_attn.k_proj,0.0000006580,0.05000,1.767
17,self_attn.o_gate,0.0000050420,0.05000,0.386
17,mlp.gate_proj,0.0000151962,0.05000,1.814
17,mlp.up_proj,0.0000138615,0.05000,1.927
17,mlp.down_proj,0.0000039890,0.05000,2.982
18,self_attn.k_proj,0.0000091951,0.05000,2.866
18,self_attn.v_proj,0.0000065547,0.05000,2.887
18,self_attn.o_proj,0.0000012725,0.05000,2.897
18,self_attn.q_proj,0.0000064936,0.05000,2.902
18,mlp.up_proj,0.0000126479,0.05000,2.038
18,mlp.gate_proj,0.0000131430,0.05000,2.050
18,mlp.down_proj,0.0000034282,0.05000,3.146
19,self_attn.v_proj,0.0000085492,0.05000,4.782
19,self_attn.q_proj,0.0000081404,0.05000,4.843
19,self_attn.k_proj,0.0000106452,0.05000,4.891
19,self_attn.o_proj,0.0000012228,0.05000,4.896
19,mlp.gate_proj,0.0000127026,0.05000,2.054
19,mlp.up_proj,0.0000129798,0.05000,2.060
19,mlp.down_proj,0.0000038460,0.05000,3.153
20,self_attn.o_proj,0.0000019678,0.05000,2.087
20,self_attn.v_proj,0.0000063993,0.05000,2.091
20,self_attn.k_proj,0.0000086538,0.05000,2.108
20,self_attn.q_proj,0.0000064077,0.05000,2.143
20,mlp.gate_proj,0.0000126178,0.05000,2.050
20,mlp.up_proj,0.0000133294,0.05000,2.058
20,mlp.down_proj,0.0000050352,0.05000,3.156
21,self_attn.q_proj,0.0000066632,0.05000,2.744
21,self_attn.o_proj,0.0000029935,0.05000,2.773
21,self_attn.v_proj,0.0000071241,0.05000,2.776
21,self_attn.k_proj,0.0000075829,0.05000,2.777
21,mlp.gate_proj,0.0000117755,0.05000,2.060
21,mlp.up_proj,0.0000126541,0.05000,2.066
21,mlp.down_proj,0.0000059720,0.05000,3.155
22,self_attn.o_proj,failsafe(rtn): 0.0022430,0.00000,0.110
22,self_attn.q_proj,0.0000090866,0.05000,1.417
22,self_attn.v_proj,0.0000010102,0.05000,1.445
22,self_attn.k_proj,0.0000005854,0.05000,1.667
22,self_attn.o_gate,0.0000048253,0.05000,0.779
22,mlp.up_proj,0.0000161680,0.05000,2.067
22,mlp.gate_proj,0.0000150658,0.05000,2.070
22,mlp.down_proj,0.0000087456,0.05000,3.160
23,self_attn.v_proj,0.0000085384,0.05000,2.781
23,self_attn.k_proj,0.0000113444,0.05000,2.789
23,self_attn.q_proj,0.0000088479,0.05000,2.795
23,self_attn.o_proj,0.0000027456,0.05000,2.799
23,mlp.gate_proj,0.0000176874,0.05000,2.017
23,mlp.up_proj,0.0000190031,0.05000,2.016
23,mlp.down_proj,0.0000105173,0.05000,3.106
24,self_attn.o_proj,0.0000046072,0.05000,2.394
24,self_attn.v_proj,0.0000076524,0.05000,2.431
24,self_attn.q_proj,0.0000075387,0.05000,2.491
24,self_attn.k_proj,0.0000093884,0.05000,2.491
24,mlp.gate_proj,0.0000207282,0.05000,1.679
24,mlp.up_proj,0.0000224395,0.05000,1.695
24,mlp.down_proj,0.0000171202,0.05000,2.841
25,self_attn.q_proj,0.0000078854,0.05000,2.658
25,self_attn.o_proj,0.0000059637,0.05000,2.663
25,self_attn.v_proj,0.0000077710,0.05000,2.721
25,self_attn.k_proj,0.0000100391,0.05000,2.728
25,mlp.gate_proj,0.0000239999,0.05000,2.067
25,mlp.up_proj,0.0000264045,0.05000,2.088
25,mlp.down_proj,0.0000301652,0.05000,3.176
26,self_attn.q_proj,0.0000126346,0.05000,2.771
26,self_attn.v_proj,0.0000123492,0.05000,2.776
26,self_attn.o_proj,0.0000112689,0.05000,2.795
26,self_attn.k_proj,0.0000180101,0.05000,2.801
26,mlp.gate_proj,0.0000267217,0.05000,2.057
26,mlp.up_proj,0.0000302445,0.05000,2.072
26,mlp.down_proj,0.0000262709,0.05000,3.169
27,self_attn.k_proj,0.0000130042,0.05000,4.944
27,self_attn.v_proj,0.0000110681,0.05000,4.947
27,self_attn.q_proj,0.0000118468,0.05000,4.956
27,self_attn.o_proj,0.0000070263,0.05000,4.957
27,mlp.up_proj,0.0000350091,0.05000,2.034
27,mlp.gate_proj,0.0000303793,0.05000,2.047
27,mlp.down_proj,0.0000345169,0.05000,3.155
28,self_attn.v_proj,0.0000156234,0.05000,2.886
28,self_attn.o_proj,0.0000215157,0.05000,2.916
28,self_attn.q_proj,0.0000151926,0.05000,2.922
28,self_attn.k_proj,0.0000210312,0.05000,2.927
28,mlp.up_proj,0.0000407514,0.05000,1.989
28,mlp.gate_proj,0.0000345202,0.05000,2.030
28,mlp.down_proj,0.0000481835,0.05000,3.106
29,self_attn.o_proj,failsafe(rtn): 0.0025024,0.00000,0.085
29,self_attn.q_proj,0.0000194512,0.05000,1.404
29,self_attn.v_proj,0.0000098854,0.05000,1.465
29,self_attn.k_proj,0.0000010127,0.05000,1.586
29,self_attn.o_gate,0.0000215202,0.05000,0.486
29,mlp.gate_proj,0.0000427544,0.05000,1.730
29,mlp.up_proj,0.0000519517,0.05000,1.899
29,mlp.down_proj,0.0000784172,0.05000,2.944
30,self_attn.o_proj,failsafe(rtn): 0.0025940,0.00000,0.114
30,self_attn.q_proj,0.0000298560,0.05000,1.735
30,self_attn.v_proj,0.0000307847,0.05000,1.778
30,self_attn.k_proj,0.0000011827,0.05000,1.793
30,self_attn.o_gate,0.0000342893,0.05000,0.404
30,mlp.gate_proj,0.0000556245,0.05000,2.027
30,mlp.up_proj,0.0000671554,0.05000,2.046
30,mlp.down_proj,0.0001689895,0.05000,3.163
31,self_attn.o_proj,failsafe(rtn): 0.0024261,0.00000,0.110
31,self_attn.q_proj,0.0000205567,0.05000,1.736
31,self_attn.k_proj,0.0000007648,0.05000,1.774
31,self_attn.v_proj,0.0000048051,0.05000,1.782
31,self_attn.o_gate,0.0000215028,0.05000,0.393
31,mlp.gate_proj,0.0000875096,0.05000,3.228
31,mlp.up_proj,0.0000976488,0.05000,3.286
31,mlp.down_proj,0.0006907178,0.05000,5.370