File size: 9,067 Bytes
54d983f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
layer,module,loss,samples,damp,time
0,self_attn.v_proj,0.0000003655,0.05000,6.135
0,self_attn.q_proj,0.0000139163,0.05000,6.251
0,self_attn.k_proj,0.0000070522,0.05000,6.287
0,self_attn.o_proj,0.0000000161,0.05000,1.438
0,mlp.up_proj,0.0000059081,0.05000,3.042
0,mlp.gate_proj,0.0000067841,0.05000,3.079
0,mlp.down_proj,0.0000000707,0.05000,3.574
1,self_attn.q_proj,0.0000158107,0.05000,6.250
1,self_attn.k_proj,0.0000093271,0.05000,6.342
1,self_attn.v_proj,0.0000009569,0.05000,6.403
1,self_attn.o_proj,0.0000000717,0.05000,1.268
1,mlp.gate_proj,0.0000085050,0.05000,2.909
1,mlp.up_proj,0.0000074407,0.05000,2.938
1,mlp.down_proj,0.0000020780,0.05000,3.624
2,self_attn.v_proj,0.0000035664,0.05000,6.128
2,self_attn.k_proj,0.0000384627,0.05000,6.183
2,self_attn.q_proj,0.0000636528,0.05000,6.217
2,self_attn.o_proj,0.0000000483,0.05000,1.279
2,mlp.gate_proj,0.0000133714,0.05000,2.891
2,mlp.up_proj,0.0000112456,0.05000,2.918
2,mlp.down_proj,0.0000001836,0.05000,3.592
3,self_attn.k_proj,0.0000209325,0.05000,6.104
3,self_attn.q_proj,0.0000374590,0.05000,6.210
3,self_attn.v_proj,0.0000032397,0.05000,6.243
3,self_attn.o_proj,0.0000000519,0.05000,1.242
3,mlp.gate_proj,0.0000199032,0.05000,2.985
3,mlp.up_proj,0.0000147765,0.05000,3.037
3,mlp.down_proj,0.0000002623,0.05000,3.622
4,self_attn.k_proj,0.0000209495,0.05000,6.247
4,self_attn.v_proj,0.0000035178,0.05000,6.307
4,self_attn.q_proj,0.0000402571,0.05000,6.356
4,self_attn.o_proj,0.0000001683,0.05000,1.296
4,mlp.gate_proj,0.0000267916,0.05000,3.001
4,mlp.up_proj,0.0000176318,0.05000,3.032
4,mlp.down_proj,0.0000004458,0.05000,3.538
5,self_attn.q_proj,0.0000566906,0.05000,6.143
5,self_attn.k_proj,0.0000350725,0.05000,6.192
5,self_attn.v_proj,0.0000035867,0.05000,6.259
5,self_attn.o_proj,0.0000002498,0.05000,1.254
5,mlp.up_proj,0.0000200849,0.05000,2.861
5,mlp.gate_proj,0.0000287105,0.05000,2.907
5,mlp.down_proj,0.0000006270,0.05000,3.494
6,self_attn.q_proj,0.0000456653,0.05000,6.164
6,self_attn.k_proj,0.0000243565,0.05000,6.213
6,self_attn.v_proj,0.0000032720,0.05000,6.258
6,self_attn.o_proj,0.0000002966,0.05000,1.271
6,mlp.gate_proj,0.0000316156,0.05000,2.851
6,mlp.up_proj,0.0000221315,0.05000,2.875
6,mlp.down_proj,0.0000007019,0.05000,3.596
7,self_attn.k_proj,0.0000232553,0.05000,6.241
7,self_attn.q_proj,0.0000390834,0.05000,6.349
7,self_attn.v_proj,0.0000030140,0.05000,6.364
7,self_attn.o_proj,0.0000005020,0.05000,1.230
7,mlp.gate_proj,0.0000300552,0.05000,2.820
7,mlp.up_proj,0.0000227827,0.05000,2.857
7,mlp.down_proj,0.0000007818,0.05000,3.548
8,self_attn.k_proj,0.0000296915,0.05000,6.160
8,self_attn.q_proj,0.0000489327,0.05000,6.220
8,self_attn.v_proj,0.0000035765,0.05000,6.250
8,self_attn.o_proj,0.0000005180,0.05000,1.287
8,mlp.gate_proj,0.0000325682,0.05000,3.116
8,mlp.up_proj,0.0000241525,0.05000,3.147
8,mlp.down_proj,0.0000007777,0.05000,3.697
9,self_attn.k_proj,0.0000279815,0.05000,6.365
9,self_attn.q_proj,0.0000470294,0.05000,6.415
9,self_attn.v_proj,0.0000045795,0.05000,6.444
9,self_attn.o_proj,0.0000005254,0.05000,1.274
9,mlp.gate_proj,0.0000318904,0.05000,3.054
9,mlp.up_proj,0.0000243400,0.05000,3.097
9,mlp.down_proj,0.0000007917,0.05000,3.662
10,self_attn.v_proj,0.0000035862,0.05000,6.390
10,self_attn.k_proj,0.0000299603,0.05000,6.439
10,self_attn.q_proj,0.0000476768,0.05000,6.472
10,self_attn.o_proj,0.0000006423,0.05000,1.260
10,mlp.gate_proj,0.0000323488,0.05000,3.004
10,mlp.up_proj,0.0000264711,0.05000,3.029
10,mlp.down_proj,0.0000009133,0.05000,3.569
11,self_attn.v_proj,0.0000042222,0.05000,6.385
11,self_attn.q_proj,0.0000389616,0.05000,6.432
11,self_attn.k_proj,0.0000221590,0.05000,6.478
11,self_attn.o_proj,0.0000007108,0.05000,1.303
11,mlp.gate_proj,0.0000346920,0.05000,3.147
11,mlp.up_proj,0.0000293260,0.05000,3.189
11,mlp.down_proj,0.0000010680,0.05000,3.578
12,self_attn.v_proj,0.0000049074,0.05000,6.397
12,self_attn.q_proj,0.0000599314,0.05000,6.425
12,self_attn.k_proj,0.0000360839,0.05000,6.448
12,self_attn.o_proj,0.0000008251,0.05000,1.306
12,mlp.up_proj,0.0000314120,0.05000,3.121
12,mlp.gate_proj,0.0000372577,0.05000,3.155
12,mlp.down_proj,0.0000012126,0.05000,3.640
13,self_attn.k_proj,0.0000389678,0.05000,6.532
13,self_attn.q_proj,0.0000606843,0.05000,6.581
13,self_attn.v_proj,0.0000056146,0.05000,6.603
13,self_attn.o_proj,0.0000009978,0.05000,1.232
13,mlp.gate_proj,0.0000445419,0.05000,3.054
13,mlp.up_proj,0.0000351055,0.05000,3.085
13,mlp.down_proj,0.0000015760,0.05000,3.608
14,self_attn.q_proj,0.0000686343,0.05000,6.346
14,self_attn.k_proj,0.0000314004,0.05000,6.429
14,self_attn.v_proj,0.0000066387,0.05000,6.455
14,self_attn.o_proj,0.0000012561,0.05000,1.325
14,mlp.up_proj,0.0000375102,0.05000,2.993
14,mlp.gate_proj,0.0000478461,0.05000,3.019
14,mlp.down_proj,0.0000019180,0.05000,3.639
15,self_attn.v_proj,0.0000065177,0.05000,6.382
15,self_attn.q_proj,0.0000684232,0.05000,6.478
15,self_attn.k_proj,0.0000357806,0.05000,6.490
15,self_attn.o_proj,0.0000009062,0.05000,1.253
15,mlp.up_proj,0.0000374816,0.05000,3.019
15,mlp.gate_proj,0.0000510690,0.05000,3.046
15,mlp.down_proj,0.0000019322,0.05000,3.550
16,self_attn.q_proj,0.0000697884,0.05000,6.246
16,self_attn.v_proj,0.0000073443,0.05000,6.299
16,self_attn.k_proj,0.0000398958,0.05000,6.347
16,self_attn.o_proj,0.0000005650,0.05000,1.274
16,mlp.up_proj,0.0000372290,0.05000,3.057
16,mlp.gate_proj,0.0000518827,0.05000,3.110
16,mlp.down_proj,0.0000018123,0.05000,3.574
17,self_attn.v_proj,0.0000071509,0.05000,6.345
17,self_attn.k_proj,0.0000360828,0.05000,6.390
17,self_attn.q_proj,0.0000662234,0.05000,6.422
17,self_attn.o_proj,0.0000005764,0.05000,1.277
17,mlp.gate_proj,0.0000540872,0.05000,3.071
17,mlp.up_proj,0.0000382789,0.05000,3.121
17,mlp.down_proj,0.0000019723,0.05000,3.628
18,self_attn.q_proj,0.0000768960,0.05000,6.232
18,self_attn.v_proj,0.0000090087,0.05000,6.312
18,self_attn.k_proj,0.0000426919,0.05000,6.348
18,self_attn.o_proj,0.0000005129,0.05000,1.306
18,mlp.up_proj,0.0000403784,0.05000,3.084
18,mlp.gate_proj,0.0000557208,0.05000,3.102
18,mlp.down_proj,0.0000020153,0.05000,3.576
19,self_attn.k_proj,0.0000391191,0.05000,6.414
19,self_attn.q_proj,0.0000672697,0.05000,6.462
19,self_attn.v_proj,0.0000086852,0.05000,6.490
19,self_attn.o_proj,0.0000007129,0.05000,1.348
19,mlp.up_proj,0.0000428687,0.05000,3.091
19,mlp.gate_proj,0.0000584434,0.05000,3.129
19,mlp.down_proj,0.0000024692,0.05000,3.558
20,self_attn.v_proj,0.0000102000,0.05000,6.572
20,self_attn.k_proj,0.0000419316,0.05000,6.646
20,self_attn.q_proj,0.0000693657,0.05000,6.679
20,self_attn.o_proj,0.0000005730,0.05000,1.440
20,mlp.up_proj,0.0000435856,0.05000,3.067
20,mlp.gate_proj,0.0000564082,0.05000,3.126
20,mlp.down_proj,0.0000023884,0.05000,3.670
21,self_attn.k_proj,0.0000410597,0.05000,6.395
21,self_attn.v_proj,0.0000134331,0.05000,6.524
21,self_attn.q_proj,0.0000694150,0.05000,6.547
21,self_attn.o_proj,0.0000006532,0.05000,1.324
21,mlp.gate_proj,0.0000595703,0.05000,3.093
21,mlp.up_proj,0.0000455969,0.05000,3.126
21,mlp.down_proj,0.0000025119,0.05000,3.593
22,self_attn.k_proj,0.0000379314,0.05000,6.376
22,self_attn.q_proj,0.0000656641,0.05000,6.415
22,self_attn.v_proj,0.0000129559,0.05000,6.437
22,self_attn.o_proj,0.0000006026,0.05000,1.297
22,mlp.up_proj,0.0000487634,0.05000,3.087
22,mlp.gate_proj,0.0000640544,0.05000,3.115
22,mlp.down_proj,0.0000028967,0.05000,3.652
23,self_attn.q_proj,0.0000680379,0.05000,6.571
23,self_attn.v_proj,0.0000125475,0.05000,6.652
23,self_attn.k_proj,0.0000421066,0.05000,6.674
23,self_attn.o_proj,0.0000008360,0.05000,1.300
23,mlp.gate_proj,0.0000771958,0.05000,3.051
23,mlp.up_proj,0.0000553964,0.05000,3.084
23,mlp.down_proj,0.0000037663,0.05000,3.648
24,self_attn.q_proj,0.0000738199,0.05000,6.379
24,self_attn.v_proj,0.0000189734,0.05000,6.423
24,self_attn.k_proj,0.0000465050,0.05000,6.477
24,self_attn.o_proj,0.0000014266,0.05000,1.281
24,mlp.up_proj,0.0000596369,0.05000,3.043
24,mlp.gate_proj,0.0000848039,0.05000,3.081
24,mlp.down_proj,0.0000043019,0.05000,3.599
25,self_attn.k_proj,0.0000343078,0.05000,6.722
25,self_attn.q_proj,0.0000664509,0.05000,6.772
25,self_attn.v_proj,0.0000153221,0.05000,6.806
25,self_attn.o_proj,0.0000016806,0.05000,1.262
25,mlp.gate_proj,0.0000898007,0.05000,3.004
25,mlp.up_proj,0.0000626860,0.05000,3.051
25,mlp.down_proj,0.0000053409,0.05000,3.695
26,self_attn.v_proj,0.0000210691,0.05000,6.411
26,self_attn.q_proj,0.0000636958,0.05000,6.472
26,self_attn.k_proj,0.0000403479,0.05000,6.524
26,self_attn.o_proj,0.0000014542,0.05000,1.290
26,mlp.up_proj,0.0000650641,0.05000,3.043
26,mlp.gate_proj,0.0000945761,0.05000,3.087
26,mlp.down_proj,0.0000081644,0.05000,3.669
27,self_attn.v_proj,0.0000141159,0.05000,6.830
27,self_attn.k_proj,0.0000273797,0.05000,6.913
27,self_attn.q_proj,0.0000486121,0.05000,6.940
27,self_attn.o_proj,0.0000070236,0.05000,1.345
27,mlp.gate_proj,0.0000950958,0.05000,3.230
27,mlp.up_proj,0.0000753671,0.05000,3.255
27,mlp.down_proj,0.0000416984,0.05000,3.798