File size: 9,067 Bytes
c1709c7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
layer,module,loss,samples,damp,time
0,self_attn.k_proj,0.0000066317,0.05000,5.604
0,self_attn.v_proj,0.0000003162,0.05000,5.657
0,self_attn.q_proj,0.0000131101,0.05000,5.664
0,self_attn.o_proj,0.0000000105,0.05000,1.036
0,mlp.up_proj,0.0000043037,0.05000,2.764
0,mlp.gate_proj,0.0000049660,0.05000,2.821
0,mlp.down_proj,0.0000000337,0.05000,3.033
1,self_attn.q_proj,0.0000134517,0.05000,5.363
1,self_attn.k_proj,0.0000079466,0.05000,5.439
1,self_attn.v_proj,0.0000008457,0.05000,5.470
1,self_attn.o_proj,0.0000000309,0.05000,1.609
1,mlp.up_proj,0.0000054533,0.05000,2.981
1,mlp.gate_proj,0.0000062155,0.05000,3.048
1,mlp.down_proj,0.0000013564,0.05000,3.028
2,self_attn.v_proj,0.0000031549,0.05000,5.545
2,self_attn.k_proj,0.0000328911,0.05000,5.608
2,self_attn.q_proj,0.0000544763,0.05000,5.630
2,self_attn.o_proj,0.0000000260,0.05000,1.056
2,mlp.up_proj,0.0000075630,0.05000,2.302
2,mlp.gate_proj,0.0000090424,0.05000,2.328
2,mlp.down_proj,0.0000000819,0.05000,2.929
3,self_attn.v_proj,0.0000024446,0.05000,5.036
3,self_attn.q_proj,0.0000284173,0.05000,5.095
3,self_attn.k_proj,0.0000160375,0.05000,5.110
3,self_attn.o_proj,0.0000000395,0.05000,0.981
3,mlp.up_proj,0.0000099246,0.05000,2.359
3,mlp.gate_proj,0.0000134570,0.05000,2.377
3,mlp.down_proj,0.0000001285,0.05000,2.936
4,self_attn.q_proj,0.0000308824,0.05000,4.856
4,self_attn.k_proj,0.0000161503,0.05000,4.937
4,self_attn.v_proj,0.0000027480,0.05000,4.945
4,self_attn.o_proj,0.0000001472,0.05000,1.006
4,mlp.up_proj,0.0000110954,0.05000,2.308
4,mlp.gate_proj,0.0000169268,0.05000,2.318
4,mlp.down_proj,0.0000002153,0.05000,2.895
5,self_attn.k_proj,0.0000247132,0.05000,4.874
5,self_attn.v_proj,0.0000025206,0.05000,4.912
5,self_attn.q_proj,0.0000401912,0.05000,4.949
5,self_attn.o_proj,0.0000002016,0.05000,1.024
5,mlp.up_proj,0.0000116495,0.05000,2.398
5,mlp.gate_proj,0.0000167069,0.05000,2.427
5,mlp.down_proj,0.0000002504,0.05000,2.914
6,self_attn.q_proj,0.0000297058,0.05000,4.908
6,self_attn.v_proj,0.0000020855,0.05000,4.930
6,self_attn.k_proj,0.0000157571,0.05000,4.957
6,self_attn.o_proj,0.0000002040,0.05000,0.995
6,mlp.gate_proj,0.0000173706,0.05000,2.219
6,mlp.up_proj,0.0000121039,0.05000,2.235
6,mlp.down_proj,0.0000002704,0.05000,2.928
7,self_attn.v_proj,0.0000017357,0.05000,4.881
7,self_attn.q_proj,0.0000230821,0.05000,4.937
7,self_attn.k_proj,0.0000135448,0.05000,4.972
7,self_attn.o_proj,0.0000003555,0.05000,1.008
7,mlp.up_proj,0.0000111421,0.05000,2.348
7,mlp.gate_proj,0.0000147502,0.05000,2.371
7,mlp.down_proj,0.0000002646,0.05000,2.943
8,self_attn.v_proj,0.0000018656,0.05000,4.844
8,self_attn.k_proj,0.0000156979,0.05000,4.940
8,self_attn.q_proj,0.0000260248,0.05000,4.954
8,self_attn.o_proj,0.0000002554,0.05000,1.032
8,mlp.gate_proj,0.0000151389,0.05000,2.306
8,mlp.up_proj,0.0000111552,0.05000,2.328
8,mlp.down_proj,0.0000002605,0.05000,2.917
9,self_attn.k_proj,0.0000143108,0.05000,4.860
9,self_attn.q_proj,0.0000242022,0.05000,4.928
9,self_attn.v_proj,0.0000023695,0.05000,4.949
9,self_attn.o_proj,0.0000003200,0.05000,1.004
9,mlp.gate_proj,0.0000136877,0.05000,2.331
9,mlp.up_proj,0.0000103751,0.05000,2.352
9,mlp.down_proj,0.0000002325,0.05000,3.009
10,self_attn.k_proj,0.0000141120,0.05000,4.792
10,self_attn.v_proj,0.0000016791,0.05000,4.899
10,self_attn.q_proj,0.0000224765,0.05000,4.928
10,self_attn.o_proj,0.0000003049,0.05000,0.988
10,mlp.gate_proj,0.0000133799,0.05000,2.362
10,mlp.up_proj,0.0000108373,0.05000,2.385
10,mlp.down_proj,0.0000002523,0.05000,2.923
11,self_attn.v_proj,0.0000018817,0.05000,4.830
11,self_attn.k_proj,0.0000098131,0.05000,4.870
11,self_attn.q_proj,0.0000172427,0.05000,4.893
11,self_attn.o_proj,0.0000003372,0.05000,1.023
11,mlp.up_proj,0.0000116445,0.05000,2.369
11,mlp.gate_proj,0.0000139223,0.05000,2.385
11,mlp.down_proj,0.0000002836,0.05000,2.904
12,self_attn.k_proj,0.0000158544,0.05000,4.890
12,self_attn.v_proj,0.0000021823,0.05000,4.930
12,self_attn.q_proj,0.0000265778,0.05000,4.956
12,self_attn.o_proj,0.0000004284,0.05000,1.025
12,mlp.up_proj,0.0000118799,0.05000,2.475
12,mlp.gate_proj,0.0000142177,0.05000,2.500
12,mlp.down_proj,0.0000003101,0.05000,2.907
13,self_attn.q_proj,0.0000267522,0.05000,4.748
13,self_attn.k_proj,0.0000171389,0.05000,4.842
13,self_attn.v_proj,0.0000025023,0.05000,4.862
13,self_attn.o_proj,0.0000003842,0.05000,1.012
13,mlp.up_proj,0.0000133185,0.05000,2.264
13,mlp.gate_proj,0.0000171810,0.05000,2.266
13,mlp.down_proj,0.0000004323,0.05000,2.890
14,self_attn.v_proj,0.0000028622,0.05000,4.864
14,self_attn.k_proj,0.0000135327,0.05000,4.916
14,self_attn.q_proj,0.0000299978,0.05000,4.935
14,self_attn.o_proj,0.0000006734,0.05000,1.056
14,mlp.gate_proj,0.0000172195,0.05000,2.320
14,mlp.up_proj,0.0000132037,0.05000,2.334
14,mlp.down_proj,0.0000004415,0.05000,2.914
15,self_attn.v_proj,0.0000026275,0.05000,4.870
15,self_attn.q_proj,0.0000276026,0.05000,4.926
15,self_attn.k_proj,0.0000143523,0.05000,4.975
15,self_attn.o_proj,0.0000004534,0.05000,1.019
15,mlp.up_proj,0.0000123739,0.05000,2.333
15,mlp.gate_proj,0.0000171509,0.05000,2.347
15,mlp.down_proj,0.0000004026,0.05000,2.899
16,self_attn.v_proj,0.0000027276,0.05000,4.838
16,self_attn.k_proj,0.0000147427,0.05000,4.913
16,self_attn.q_proj,0.0000258635,0.05000,4.952
16,self_attn.o_proj,0.0000002295,0.05000,1.028
16,mlp.up_proj,0.0000119168,0.05000,2.310
16,mlp.gate_proj,0.0000168227,0.05000,2.335
16,mlp.down_proj,0.0000003651,0.05000,2.977
17,self_attn.k_proj,0.0000133368,0.05000,4.867
17,self_attn.v_proj,0.0000026553,0.05000,4.914
17,self_attn.q_proj,0.0000245140,0.05000,4.957
17,self_attn.o_proj,0.0000001955,0.05000,1.024
17,mlp.up_proj,0.0000118180,0.05000,2.375
17,mlp.gate_proj,0.0000168303,0.05000,2.379
17,mlp.down_proj,0.0000003665,0.05000,2.947
18,self_attn.k_proj,0.0000160067,0.05000,4.867
18,self_attn.q_proj,0.0000288517,0.05000,4.866
18,self_attn.v_proj,0.0000034172,0.05000,4.914
18,self_attn.o_proj,0.0000001835,0.05000,1.004
18,mlp.gate_proj,0.0000170495,0.05000,2.334
18,mlp.up_proj,0.0000122552,0.05000,2.338
18,mlp.down_proj,0.0000003414,0.05000,2.982
19,self_attn.q_proj,0.0000253104,0.05000,4.907
19,self_attn.k_proj,0.0000146613,0.05000,4.945
19,self_attn.v_proj,0.0000032850,0.05000,4.955
19,self_attn.o_proj,0.0000002477,0.05000,1.016
19,mlp.up_proj,0.0000119743,0.05000,2.340
19,mlp.gate_proj,0.0000164426,0.05000,2.355
19,mlp.down_proj,0.0000004131,0.05000,2.901
20,self_attn.k_proj,0.0000152328,0.05000,4.858
20,self_attn.v_proj,0.0000038224,0.05000,4.910
20,self_attn.q_proj,0.0000253048,0.05000,4.936
20,self_attn.o_proj,0.0000002420,0.05000,1.045
20,mlp.up_proj,0.0000115511,0.05000,2.373
20,mlp.gate_proj,0.0000150089,0.05000,2.387
20,mlp.down_proj,0.0000003303,0.05000,2.899
21,self_attn.v_proj,0.0000050870,0.05000,4.867
21,self_attn.q_proj,0.0000256488,0.05000,4.907
21,self_attn.k_proj,0.0000151884,0.05000,4.927
21,self_attn.o_proj,0.0000002438,0.05000,0.994
21,mlp.gate_proj,0.0000151286,0.05000,2.249
21,mlp.up_proj,0.0000114986,0.05000,2.266
21,mlp.down_proj,0.0000003446,0.05000,2.943
22,self_attn.v_proj,0.0000047463,0.05000,4.865
22,self_attn.k_proj,0.0000134800,0.05000,4.898
22,self_attn.q_proj,0.0000235887,0.05000,4.917
22,self_attn.o_proj,0.0000002382,0.05000,1.044
22,mlp.up_proj,0.0000117654,0.05000,2.241
22,mlp.gate_proj,0.0000155538,0.05000,2.263
22,mlp.down_proj,0.0000003936,0.05000,2.922
23,self_attn.q_proj,0.0000251569,0.05000,4.883
23,self_attn.k_proj,0.0000155190,0.05000,4.924
23,self_attn.v_proj,0.0000047481,0.05000,4.948
23,self_attn.o_proj,0.0000002655,0.05000,0.997
23,mlp.up_proj,0.0000127930,0.05000,2.364
23,mlp.gate_proj,0.0000180163,0.05000,2.367
23,mlp.down_proj,0.0000005834,0.05000,2.914
24,self_attn.v_proj,0.0000067991,0.05000,4.847
24,self_attn.k_proj,0.0000161390,0.05000,4.889
24,self_attn.q_proj,0.0000259698,0.05000,4.929
24,self_attn.o_proj,0.0000007835,0.05000,1.002
24,mlp.up_proj,0.0000131828,0.05000,2.274
24,mlp.gate_proj,0.0000190814,0.05000,2.282
24,mlp.down_proj,0.0000006735,0.05000,2.910
25,self_attn.q_proj,0.0000204543,0.05000,4.784
25,self_attn.v_proj,0.0000046968,0.05000,4.855
25,self_attn.k_proj,0.0000104107,0.05000,4.872
25,self_attn.o_proj,0.0000006803,0.05000,1.025
25,mlp.up_proj,0.0000139469,0.05000,2.367
25,mlp.gate_proj,0.0000205439,0.05000,2.381
25,mlp.down_proj,0.0000011351,0.05000,3.000
26,self_attn.v_proj,0.0000073377,0.05000,4.803
26,self_attn.q_proj,0.0000218441,0.05000,4.932
26,self_attn.k_proj,0.0000136581,0.05000,4.972
26,self_attn.o_proj,0.0000008289,0.05000,1.021
26,mlp.up_proj,0.0000149937,0.05000,2.296
26,mlp.gate_proj,0.0000223784,0.05000,2.312
26,mlp.down_proj,0.0000026421,0.05000,2.993
27,self_attn.q_proj,0.0000155607,0.05000,4.809
27,self_attn.v_proj,0.0000045720,0.05000,4.870
27,self_attn.k_proj,0.0000087533,0.05000,4.896
27,self_attn.o_proj,0.0000035290,0.05000,1.047
27,mlp.up_proj,0.0000168011,0.05000,2.397
27,mlp.gate_proj,0.0000220308,0.05000,2.413
27,mlp.down_proj,0.0000244650,0.05000,2.948