File size: 10,367 Bytes
3c2ef8e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
layer,module,loss,samples,damp,time
0,self_attn.v_proj,0.0000000004,0.05000,4.879
0,self_attn.q_proj,0.0000000150,0.05000,4.920
0,self_attn.k_proj,0.0000000045,0.05000,4.920
0,self_attn.o_proj,0.0000000000,0.05000,1.541
0,mlp.up_proj,0.0000000142,0.05000,2.767
0,mlp.gate_proj,0.0000000164,0.05000,2.788
0,mlp.down_proj,0.0000000000,0.05000,5.935
1,self_attn.q_proj,0.0000000452,0.05000,5.686
1,self_attn.k_proj,0.0000000208,0.05000,5.717
1,self_attn.v_proj,0.0000000034,0.05000,5.753
1,self_attn.o_proj,0.0000000000,0.05000,1.558
1,mlp.gate_proj,0.0000000548,0.05000,2.780
1,mlp.up_proj,0.0000000480,0.05000,2.812
1,mlp.down_proj,0.0000000073,0.05000,5.981
2,self_attn.q_proj,0.0000003553,0.05000,5.404
2,self_attn.v_proj,0.0000000225,0.05000,5.422
2,self_attn.k_proj,0.0000001725,0.05000,5.441
2,self_attn.o_proj,0.0000000000,0.05000,1.573
2,mlp.gate_proj,0.0000001153,0.05000,2.787
2,mlp.up_proj,0.0000001006,0.05000,2.788
2,mlp.down_proj,0.0000000000,0.05000,5.971
3,self_attn.k_proj,0.0000000937,0.05000,5.724
3,self_attn.v_proj,0.0000000155,0.05000,5.736
3,self_attn.q_proj,0.0000001928,0.05000,5.763
3,self_attn.o_proj,0.0000000000,0.05000,1.568
3,mlp.up_proj,0.0000001660,0.05000,2.751
3,mlp.gate_proj,0.0000001922,0.05000,2.756
3,mlp.down_proj,0.0000000001,0.05000,6.016
4,self_attn.q_proj,0.0000003002,0.05000,5.606
4,self_attn.k_proj,0.0000001324,0.05000,5.636
4,self_attn.v_proj,0.0000000261,0.05000,5.673
4,self_attn.o_proj,0.0000000000,0.05000,1.554
4,mlp.gate_proj,0.0000002774,0.05000,2.768
4,mlp.up_proj,0.0000002246,0.05000,2.775
4,mlp.down_proj,0.0000000001,0.05000,6.006
5,self_attn.q_proj,0.0000003899,0.05000,5.595
5,self_attn.v_proj,0.0000000291,0.05000,5.614
5,self_attn.k_proj,0.0000001730,0.05000,5.617
5,self_attn.o_proj,0.0000000001,0.05000,1.574
5,mlp.gate_proj,0.0000003836,0.05000,2.772
5,mlp.up_proj,0.0000002912,0.05000,2.784
5,mlp.down_proj,0.0000000003,0.05000,6.005
6,self_attn.k_proj,0.0000001706,0.05000,5.528
6,self_attn.v_proj,0.0000000294,0.05000,5.586
6,self_attn.q_proj,0.0000003636,0.05000,5.630
6,self_attn.o_proj,0.0000000001,0.05000,1.573
6,mlp.gate_proj,0.0000004451,0.05000,2.861
6,mlp.up_proj,0.0000003460,0.05000,2.881
6,mlp.down_proj,0.0000000004,0.05000,6.009
7,self_attn.v_proj,0.0000000391,0.05000,5.708
7,self_attn.k_proj,0.0000002152,0.05000,5.739
7,self_attn.q_proj,0.0000004503,0.05000,5.761
7,self_attn.o_proj,0.0000000001,0.05000,1.558
7,mlp.up_proj,0.0000004005,0.05000,2.803
7,mlp.gate_proj,0.0000005274,0.05000,2.822
7,mlp.down_proj,0.0000000005,0.05000,6.020
8,self_attn.v_proj,0.0000000364,0.05000,5.675
8,self_attn.k_proj,0.0000001697,0.05000,5.686
8,self_attn.q_proj,0.0000003730,0.05000,5.700
8,self_attn.o_proj,0.0000000002,0.05000,1.551
8,mlp.gate_proj,0.0000005689,0.05000,2.796
8,mlp.up_proj,0.0000004404,0.05000,2.803
8,mlp.down_proj,0.0000000005,0.05000,5.992
9,self_attn.v_proj,0.0000000435,0.05000,5.665
9,self_attn.q_proj,0.0000005147,0.05000,5.700
9,self_attn.k_proj,0.0000002410,0.05000,5.705
9,self_attn.o_proj,0.0000000002,0.05000,1.557
9,mlp.gate_proj,0.0000005988,0.05000,2.784
9,mlp.up_proj,0.0000004793,0.05000,2.792
9,mlp.down_proj,0.0000000007,0.05000,5.989
10,self_attn.q_proj,0.0000004548,0.05000,5.268
10,self_attn.v_proj,0.0000000379,0.05000,5.291
10,self_attn.k_proj,0.0000002164,0.05000,5.309
10,self_attn.o_proj,0.0000000003,0.05000,1.565
10,mlp.up_proj,0.0000005106,0.05000,2.780
10,mlp.gate_proj,0.0000006227,0.05000,2.789
10,mlp.down_proj,0.0000000008,0.05000,5.958
11,self_attn.v_proj,0.0000000559,0.05000,5.584
11,self_attn.k_proj,0.0000002533,0.05000,5.600
11,self_attn.q_proj,0.0000005502,0.05000,5.626
11,self_attn.o_proj,0.0000000004,0.05000,1.522
11,mlp.up_proj,0.0000005648,0.05000,2.815
11,mlp.gate_proj,0.0000006790,0.05000,2.825
11,mlp.down_proj,0.0000000009,0.05000,6.012
12,self_attn.q_proj,0.0000007278,0.05000,5.495
12,self_attn.v_proj,0.0000000635,0.05000,5.547
12,self_attn.k_proj,0.0000003287,0.05000,5.564
12,self_attn.o_proj,0.0000000004,0.05000,1.548
12,mlp.up_proj,0.0000006364,0.05000,2.785
12,mlp.gate_proj,0.0000007479,0.05000,2.796
12,mlp.down_proj,0.0000000011,0.05000,5.937
13,self_attn.q_proj,0.0000005740,0.05000,5.754
13,self_attn.k_proj,0.0000002807,0.05000,5.812
13,self_attn.v_proj,0.0000000607,0.05000,5.828
13,self_attn.o_proj,0.0000000006,0.05000,1.556
13,mlp.gate_proj,0.0000008370,0.05000,2.780
13,mlp.up_proj,0.0000007320,0.05000,2.790
13,mlp.down_proj,0.0000000013,0.05000,5.960
14,self_attn.v_proj,0.0000000978,0.05000,5.400
14,self_attn.k_proj,0.0000002809,0.05000,5.464
14,self_attn.q_proj,0.0000006548,0.05000,5.490
14,self_attn.o_proj,0.0000000007,0.05000,1.586
14,mlp.gate_proj,0.0000009377,0.05000,2.823
14,mlp.up_proj,0.0000008157,0.05000,2.830
14,mlp.down_proj,0.0000000017,0.05000,5.987
15,self_attn.q_proj,0.0000008244,0.05000,5.573
15,self_attn.v_proj,0.0000001112,0.05000,5.588
15,self_attn.k_proj,0.0000003675,0.05000,5.609
15,self_attn.o_proj,0.0000000008,0.05000,1.566
15,mlp.gate_proj,0.0000010717,0.05000,2.808
15,mlp.up_proj,0.0000009010,0.05000,2.818
15,mlp.down_proj,0.0000000022,0.05000,5.975
16,self_attn.k_proj,0.0000003414,0.05000,5.454
16,self_attn.q_proj,0.0000007387,0.05000,5.480
16,self_attn.v_proj,0.0000001060,0.05000,5.509
16,self_attn.o_proj,0.0000000009,0.05000,1.547
16,mlp.gate_proj,0.0000013285,0.05000,2.811
16,mlp.up_proj,0.0000010571,0.05000,2.817
16,mlp.down_proj,0.0000000031,0.05000,5.919
17,self_attn.k_proj,0.0000002850,0.05000,5.686
17,self_attn.v_proj,0.0000001007,0.05000,5.715
17,self_attn.q_proj,0.0000006805,0.05000,5.725
17,self_attn.o_proj,0.0000000011,0.05000,1.541
17,mlp.up_proj,0.0000012283,0.05000,2.767
17,mlp.gate_proj,0.0000015413,0.05000,2.776
17,mlp.down_proj,0.0000000043,0.05000,5.974
18,self_attn.v_proj,0.0000001238,0.05000,5.556
18,self_attn.q_proj,0.0000008637,0.05000,5.607
18,self_attn.k_proj,0.0000003386,0.05000,5.626
18,self_attn.o_proj,0.0000000011,0.05000,1.556
18,mlp.gate_proj,0.0000017367,0.05000,2.802
18,mlp.up_proj,0.0000013842,0.05000,2.808
18,mlp.down_proj,0.0000000061,0.05000,5.951
19,self_attn.v_proj,0.0000001450,0.05000,5.642
19,self_attn.k_proj,0.0000003408,0.05000,5.663
19,self_attn.q_proj,0.0000007988,0.05000,5.674
19,self_attn.o_proj,0.0000000013,0.05000,1.604
19,mlp.gate_proj,0.0000019642,0.05000,2.772
19,mlp.up_proj,0.0000015377,0.05000,2.790
19,mlp.down_proj,0.0000000081,0.05000,5.997
20,self_attn.v_proj,0.0000460283,0.05000,5.376
20,self_attn.k_proj,0.0001023333,0.05000,5.450
20,self_attn.q_proj,0.0002501937,0.05000,5.460
20,self_attn.o_proj,0.0000003618,0.05000,1.531
20,mlp.up_proj,0.0004975359,0.05000,2.774
20,mlp.gate_proj,0.0006521194,0.05000,2.779
20,mlp.down_proj,0.0000025065,0.05000,5.931
21,self_attn.q_proj,0.0002477611,0.05000,5.504
21,self_attn.v_proj,0.0000486894,0.05000,5.527
21,self_attn.k_proj,0.0001006711,0.05000,5.532
21,self_attn.o_proj,0.0000003476,0.05000,1.553
21,mlp.gate_proj,0.0007708430,0.05000,2.834
21,mlp.up_proj,0.0005485023,0.05000,2.842
21,mlp.down_proj,0.0000025757,0.05000,5.948
22,self_attn.k_proj,0.0000960108,0.05000,5.470
22,self_attn.v_proj,0.0000507348,0.05000,5.527
22,self_attn.q_proj,0.0002405264,0.05000,5.538
22,self_attn.o_proj,0.0000002108,0.05000,1.560
22,mlp.gate_proj,0.0008269122,0.05000,2.758
22,mlp.up_proj,0.0005943207,0.05000,2.774
22,mlp.down_proj,0.0000027136,0.05000,5.973
23,self_attn.k_proj,0.0000954055,0.05000,5.533
23,self_attn.q_proj,0.0002420283,0.05000,5.626
23,self_attn.v_proj,0.0000532537,0.05000,5.632
23,self_attn.o_proj,0.0000003501,0.05000,1.542
23,mlp.gate_proj,0.0009137075,0.05000,2.776
23,mlp.up_proj,0.0006547531,0.05000,2.791
23,mlp.down_proj,0.0000030363,0.05000,5.945
24,self_attn.v_proj,0.0000622052,0.05000,5.253
24,self_attn.k_proj,0.0001111750,0.05000,5.364
24,self_attn.q_proj,0.0002778825,0.05000,5.381
24,self_attn.o_proj,0.0000003277,0.05000,1.549
24,mlp.gate_proj,0.0010315949,0.05000,2.772
24,mlp.up_proj,0.0007254657,0.05000,2.776
24,mlp.down_proj,0.0000033368,0.05000,5.985
25,self_attn.q_proj,0.0002790395,0.05000,5.293
25,self_attn.k_proj,0.0001072556,0.05000,5.306
25,self_attn.v_proj,0.0000711914,0.05000,5.348
25,self_attn.o_proj,0.0000003792,0.05000,1.548
25,mlp.gate_proj,0.0011210348,0.05000,2.794
25,mlp.up_proj,0.0007955477,0.05000,2.800
25,mlp.down_proj,0.0000038278,0.05000,5.962
26,self_attn.k_proj,0.0001005206,0.05000,5.340
26,self_attn.v_proj,0.0000751634,0.05000,5.363
26,self_attn.q_proj,0.0002639526,0.05000,5.399
26,self_attn.o_proj,0.0000004955,0.05000,1.543
26,mlp.gate_proj,0.0011751253,0.05000,2.736
26,mlp.up_proj,0.0008637970,0.05000,2.749
26,mlp.down_proj,0.0000043847,0.05000,5.972
27,self_attn.v_proj,0.0000680719,0.05000,5.472
27,self_attn.k_proj,0.0001012717,0.05000,5.603
27,self_attn.q_proj,0.0002717224,0.05000,5.640
27,self_attn.o_proj,0.0000005582,0.05000,1.565
27,mlp.up_proj,0.0009500155,0.05000,2.771
27,mlp.gate_proj,0.0012836707,0.05000,2.789
27,mlp.down_proj,0.0000051473,0.05000,5.936
28,self_attn.v_proj,0.0000992078,0.05000,5.279
28,self_attn.q_proj,0.0002645630,0.05000,5.304
28,self_attn.k_proj,0.0001015999,0.05000,5.323
28,self_attn.o_proj,0.0000008682,0.05000,1.565
28,mlp.up_proj,0.0010524418,0.05000,2.756
28,mlp.gate_proj,0.0013713280,0.05000,2.756
28,mlp.down_proj,0.0000069629,0.05000,5.935
29,self_attn.q_proj,0.0003024004,0.05000,5.304
29,self_attn.v_proj,0.0001436985,0.05000,5.320
29,self_attn.k_proj,0.0001024819,0.05000,5.347
29,self_attn.o_proj,0.0000018162,0.05000,1.534
29,mlp.gate_proj,0.0013321182,0.05000,2.757
29,mlp.up_proj,0.0010659065,0.05000,2.770
29,mlp.down_proj,0.0000087815,0.05000,5.935
30,self_attn.k_proj,0.0000003260,0.05000,5.367
30,self_attn.q_proj,0.0000009703,0.05000,5.382
30,self_attn.v_proj,0.0000005463,0.05000,5.396
30,self_attn.o_proj,0.0000000055,0.05000,1.542
30,mlp.up_proj,0.0000037689,0.05000,2.758
30,mlp.gate_proj,0.0000046290,0.05000,2.762
30,mlp.down_proj,0.0000000387,0.05000,5.944
31,self_attn.v_proj,0.0000005541,0.05000,5.782
31,self_attn.q_proj,0.0000009006,0.05000,5.797
31,self_attn.k_proj,0.0000003091,0.05000,5.814
31,self_attn.o_proj,0.0000000074,0.05000,1.541
31,mlp.gate_proj,0.0000037887,0.05000,2.776
31,mlp.up_proj,0.0000029935,0.05000,2.782
31,mlp.down_proj,0.0000000543,0.05000,5.973