File size: 9,067 Bytes
09207b1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
layer,module,loss,samples,damp,time
0,self_attn.q_proj,0.0000438894,0.05000,2.464
0,self_attn.v_proj,0.0000015206,0.05000,2.489
0,self_attn.k_proj,0.0000091191,0.05000,2.527
0,self_attn.o_proj,0.0000068863,0.05000,0.799
0,mlp.up_proj,0.0000903576,0.05000,1.729
0,mlp.gate_proj,0.0001630741,0.05000,1.737
0,mlp.down_proj,0.0000053972,0.05000,5.176
1,self_attn.k_proj,0.0000065015,0.05000,2.082
1,self_attn.v_proj,0.0000023747,0.05000,2.092
1,self_attn.q_proj,0.0000240830,0.05000,2.142
1,self_attn.o_proj,0.0000021045,0.05000,0.795
1,mlp.gate_proj,0.0076908120,0.05000,1.750
1,mlp.up_proj,0.0053946630,0.05000,1.757
1,mlp.down_proj,0.0000063501,0.05000,5.185
2,self_attn.k_proj,0.0000150275,0.05000,3.674
2,self_attn.q_proj,0.0000574105,0.05000,3.691
2,self_attn.v_proj,0.0000031481,0.05000,3.720
2,self_attn.o_proj,0.0000035134,0.05000,0.823
2,mlp.gate_proj,0.0053627073,0.05000,1.712
2,mlp.up_proj,0.0036250334,0.05000,1.718
2,mlp.down_proj,0.0000113211,0.05000,5.244
3,self_attn.q_proj,0.0000806432,0.05000,2.577
3,self_attn.v_proj,0.0000060402,0.05000,2.596
3,self_attn.k_proj,0.0000198588,0.05000,2.621
3,self_attn.o_proj,0.0000076269,0.05000,0.769
3,mlp.gate_proj,0.0071466091,0.05000,1.720
3,mlp.up_proj,0.0057860881,0.05000,1.733
3,mlp.down_proj,0.0019782408,0.05000,5.223
4,self_attn.q_proj,0.0001705793,0.05000,2.892
4,self_attn.v_proj,0.0000171012,0.05000,2.968
4,self_attn.k_proj,0.0000366052,0.05000,2.977
4,self_attn.o_proj,0.0000097317,0.05000,0.762
4,mlp.gate_proj,0.0056847281,0.05000,1.737
4,mlp.up_proj,0.0041875392,0.05000,1.744
4,mlp.down_proj,0.0004363772,0.05000,5.214
5,self_attn.q_proj,0.0001847717,0.05000,2.402
5,self_attn.k_proj,0.0000372404,0.05000,2.453
5,self_attn.v_proj,0.0000176765,0.05000,2.471
5,self_attn.o_proj,0.0000111212,0.05000,0.816
5,mlp.up_proj,0.0059180821,0.05000,1.509
5,mlp.gate_proj,0.0068266907,0.05000,1.512
5,mlp.down_proj,0.0000407658,0.05000,5.318
6,self_attn.v_proj,0.0000146883,0.05000,3.003
6,self_attn.q_proj,0.0001399245,0.05000,3.015
6,self_attn.k_proj,0.0000274019,0.05000,3.030
6,self_attn.o_proj,0.0000136528,0.05000,0.786
6,mlp.up_proj,0.0009805247,0.05000,1.696
6,mlp.gate_proj,0.0013682553,0.05000,1.707
6,mlp.down_proj,0.0000458023,0.05000,5.316
7,self_attn.q_proj,0.0001471542,0.05000,2.522
7,self_attn.v_proj,0.0000256503,0.05000,2.557
7,self_attn.k_proj,0.0000249125,0.05000,2.578
7,self_attn.o_proj,0.0000177934,0.05000,0.797
7,mlp.gate_proj,0.0006032905,0.05000,1.708
7,mlp.up_proj,0.0005537175,0.05000,1.713
7,mlp.down_proj,0.0000577018,0.05000,5.372
8,self_attn.q_proj,0.0002175183,0.05000,3.201
8,self_attn.v_proj,0.0000218963,0.05000,3.266
8,self_attn.k_proj,0.0000468722,0.05000,3.294
8,self_attn.o_proj,0.0000200981,0.05000,0.781
8,mlp.gate_proj,0.0005997233,0.05000,1.732
8,mlp.up_proj,0.0005824831,0.05000,1.740
8,mlp.down_proj,0.0000502734,0.05000,5.312
9,self_attn.q_proj,0.0001792254,0.05000,2.273
9,self_attn.v_proj,0.0000299312,0.05000,2.300
9,self_attn.k_proj,0.0000316829,0.05000,2.332
9,self_attn.o_proj,0.0000227054,0.05000,0.777
9,mlp.up_proj,0.0013988300,0.05000,1.714
9,mlp.gate_proj,0.0021636186,0.05000,1.725
9,mlp.down_proj,0.0000711714,0.05000,5.317
10,self_attn.q_proj,0.0001766122,0.05000,3.446
10,self_attn.v_proj,0.0000218141,0.05000,3.489
10,self_attn.k_proj,0.0000340336,0.05000,3.500
10,self_attn.o_proj,0.0000213610,0.05000,0.795
10,mlp.up_proj,0.0005967160,0.05000,1.723
10,mlp.gate_proj,0.0006525118,0.05000,1.734
10,mlp.down_proj,0.0000555367,0.05000,5.270
11,self_attn.q_proj,0.0002048051,0.05000,2.349
11,self_attn.v_proj,0.0000197669,0.05000,2.365
11,self_attn.k_proj,0.0000437646,0.05000,2.421
11,self_attn.o_proj,0.0000240660,0.05000,0.808
11,mlp.up_proj,0.0005818227,0.05000,1.748
11,mlp.gate_proj,0.0005919564,0.05000,1.761
11,mlp.down_proj,0.0000545234,0.05000,5.195
12,self_attn.q_proj,0.0002245534,0.05000,3.620
12,self_attn.v_proj,0.0000252695,0.05000,3.661
12,self_attn.k_proj,0.0000469002,0.05000,3.666
12,self_attn.o_proj,0.0000249968,0.05000,0.808
12,mlp.gate_proj,0.0005769528,0.05000,1.745
12,mlp.up_proj,0.0005966430,0.05000,1.748
12,mlp.down_proj,0.0000542671,0.05000,5.341
13,self_attn.q_proj,0.0002443681,0.05000,2.494
13,self_attn.k_proj,0.0000441101,0.05000,2.515
13,self_attn.v_proj,0.0000307820,0.05000,2.556
13,self_attn.o_proj,0.0000284445,0.05000,0.817
13,mlp.gate_proj,0.0005881149,0.05000,1.689
13,mlp.up_proj,0.0005680628,0.05000,1.689
13,mlp.down_proj,0.0000449818,0.05000,5.261
14,self_attn.q_proj,0.0003254954,0.05000,3.587
14,self_attn.k_proj,0.0000603198,0.05000,3.644
14,self_attn.v_proj,0.0000318992,0.05000,3.654
14,self_attn.o_proj,0.0000330047,0.05000,0.769
14,mlp.gate_proj,0.0006131215,0.05000,1.744
14,mlp.up_proj,0.0006312539,0.05000,1.755
14,mlp.down_proj,0.0000674289,0.05000,5.298
15,self_attn.q_proj,0.0002752336,0.05000,2.450
15,self_attn.v_proj,0.0000280227,0.05000,2.471
15,self_attn.k_proj,0.0000577813,0.05000,2.498
15,self_attn.o_proj,0.0000255403,0.05000,0.808
15,mlp.up_proj,0.0006327451,0.05000,1.727
15,mlp.gate_proj,0.0006008668,0.05000,1.732
15,mlp.down_proj,0.0000529739,0.05000,5.423
16,self_attn.q_proj,0.0002853569,0.05000,3.297
16,self_attn.k_proj,0.0000520708,0.05000,3.361
16,self_attn.v_proj,0.0000383636,0.05000,3.408
16,self_attn.o_proj,0.0000282680,0.05000,0.784
16,mlp.gate_proj,0.0005806266,0.05000,1.681
16,mlp.up_proj,0.0006096392,0.05000,1.687
16,mlp.down_proj,0.0000499779,0.05000,5.265
17,self_attn.q_proj,0.0003053582,0.05000,2.297
17,self_attn.k_proj,0.0000502396,0.05000,2.343
17,self_attn.v_proj,0.0000401401,0.05000,2.362
17,self_attn.o_proj,0.0000236887,0.05000,0.787
17,mlp.up_proj,0.0007070474,0.05000,1.699
17,mlp.gate_proj,0.0006586523,0.05000,1.705
17,mlp.down_proj,0.0000636597,0.05000,5.242
18,self_attn.v_proj,0.0000431088,0.05000,3.427
18,self_attn.k_proj,0.0000382208,0.05000,3.450
18,self_attn.q_proj,0.0002408514,0.05000,3.464
18,self_attn.o_proj,0.0000456689,0.05000,0.767
18,mlp.gate_proj,0.0007737921,0.05000,1.665
18,mlp.up_proj,0.0008335007,0.05000,1.665
18,mlp.down_proj,0.0001066881,0.05000,5.210
19,self_attn.k_proj,0.0000421407,0.05000,2.532
19,self_attn.q_proj,0.0003071287,0.05000,2.551
19,self_attn.v_proj,0.0000575771,0.05000,2.569
19,self_attn.o_proj,0.0000559175,0.05000,0.783
19,mlp.up_proj,0.0009535861,0.05000,1.703
19,mlp.gate_proj,0.0009294953,0.05000,1.713
19,mlp.down_proj,0.0001444341,0.05000,5.298
20,self_attn.q_proj,0.0002946954,0.05000,3.331
20,self_attn.v_proj,0.0000622942,0.05000,3.366
20,self_attn.k_proj,0.0000443240,0.05000,3.408
20,self_attn.o_proj,0.0000462700,0.05000,0.771
20,mlp.up_proj,0.0012382881,0.05000,1.730
20,mlp.gate_proj,0.0012131543,0.05000,1.736
20,mlp.down_proj,0.0002655645,0.05000,5.548
21,self_attn.v_proj,0.0000965719,0.05000,2.527
21,self_attn.k_proj,0.0000449129,0.05000,2.534
21,self_attn.q_proj,0.0003481993,0.05000,2.555
21,self_attn.o_proj,0.0001077656,0.05000,0.775
21,mlp.up_proj,0.0015859862,0.05000,1.747
21,mlp.gate_proj,0.0016497311,0.05000,1.762
21,mlp.down_proj,0.0003857095,0.05000,5.253
22,self_attn.q_proj,0.0004678984,0.05000,3.419
22,self_attn.v_proj,0.0001514067,0.05000,3.532
22,self_attn.k_proj,0.0000566288,0.05000,3.545
22,self_attn.o_proj,0.0001139801,0.05000,0.786
22,mlp.up_proj,0.0021776777,0.05000,1.717
22,mlp.gate_proj,0.0022519192,0.05000,1.724
22,mlp.down_proj,0.0005672044,0.05000,5.295
23,self_attn.q_proj,0.0005272307,0.05000,2.444
23,self_attn.v_proj,0.0001932597,0.05000,2.466
23,self_attn.k_proj,0.0000665373,0.05000,2.506
23,self_attn.o_proj,0.0002671898,0.05000,0.791
23,mlp.gate_proj,0.0030036205,0.05000,1.709
23,mlp.up_proj,0.0028992037,0.05000,1.717
23,mlp.down_proj,0.0008007232,0.05000,5.237
24,self_attn.k_proj,0.0000597107,0.05000,3.429
24,self_attn.v_proj,0.0001813582,0.05000,3.471
24,self_attn.q_proj,0.0004617543,0.05000,3.532
24,self_attn.o_proj,0.0002546603,0.05000,0.797
24,mlp.up_proj,0.0030854423,0.05000,1.712
24,mlp.gate_proj,0.0029339537,0.05000,1.724
24,mlp.down_proj,0.0009348846,0.05000,5.228
25,self_attn.q_proj,0.0005000142,0.05000,2.455
25,self_attn.v_proj,0.0002651610,0.05000,2.482
25,self_attn.k_proj,0.0000586177,0.05000,2.507
25,self_attn.o_proj,0.0002996661,0.05000,0.772
25,mlp.gate_proj,0.0035098709,0.05000,1.672
25,mlp.up_proj,0.0038939198,0.05000,1.684
25,mlp.down_proj,0.0017060469,0.05000,5.197
26,self_attn.q_proj,0.0007208990,0.05000,2.176
26,self_attn.v_proj,0.0005336694,0.05000,2.225
26,self_attn.k_proj,0.0000866347,0.05000,2.270
26,self_attn.o_proj,0.0011180059,0.05000,0.846
26,mlp.gate_proj,0.0040886094,0.05000,1.700
26,mlp.up_proj,0.0046326114,0.05000,1.707
26,mlp.down_proj,0.0406908153,0.07000,7.459
27,self_attn.q_proj,0.0012152635,0.05000,2.389
27,self_attn.v_proj,0.0006911443,0.05000,2.449
27,self_attn.k_proj,0.0001213231,0.05000,2.467
27,self_attn.o_proj,0.0019590675,0.05000,0.785
27,mlp.up_proj,0.0097063605,0.05000,1.732
27,mlp.gate_proj,0.0092902648,0.05000,1.737
27,mlp.down_proj,0.0259093462,0.05000,5.223