File size: 9,067 Bytes
c5fdea3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
layer,module,loss,samples,damp,time
0,self_attn.k_proj,0.0000074878,0.05000,5.659
0,self_attn.q_proj,0.0000147233,0.05000,5.662
0,self_attn.v_proj,0.0000004164,0.05000,5.716
0,self_attn.o_proj,0.0000000188,0.05000,1.023
0,mlp.up_proj,0.0000073224,0.05000,2.317
0,mlp.gate_proj,0.0000083827,0.05000,2.350
0,mlp.down_proj,0.0000001028,0.05000,2.886
1,self_attn.k_proj,0.0000103207,0.05000,5.596
1,self_attn.q_proj,0.0000175984,0.05000,5.661
1,self_attn.v_proj,0.0000011597,0.05000,5.641
1,self_attn.o_proj,0.0000000697,0.05000,1.044
1,mlp.up_proj,0.0000099559,0.05000,2.788
1,mlp.gate_proj,0.0000113476,0.05000,2.853
1,mlp.down_proj,0.0000025962,0.05000,3.085
2,self_attn.q_proj,0.0000739263,0.05000,5.511
2,self_attn.k_proj,0.0000441155,0.05000,5.576
2,self_attn.v_proj,0.0000044463,0.05000,5.601
2,self_attn.o_proj,0.0000000672,0.05000,1.023
2,mlp.gate_proj,0.0000187338,0.05000,2.298
2,mlp.up_proj,0.0000158255,0.05000,2.314
2,mlp.down_proj,0.0000003259,0.05000,3.034
3,self_attn.k_proj,0.0000269141,0.05000,4.896
3,self_attn.v_proj,0.0000044659,0.05000,4.894
3,self_attn.q_proj,0.0000484370,0.05000,4.953
3,self_attn.o_proj,0.0000001267,0.05000,1.037
3,mlp.gate_proj,0.0000267179,0.05000,2.337
3,mlp.up_proj,0.0000199634,0.05000,2.363
3,mlp.down_proj,0.0000004599,0.05000,2.975
4,self_attn.k_proj,0.0000258326,0.05000,4.882
4,self_attn.v_proj,0.0000047591,0.05000,4.884
4,self_attn.q_proj,0.0000493107,0.05000,4.944
4,self_attn.o_proj,0.0000001978,0.05000,1.044
4,mlp.gate_proj,0.0000357187,0.05000,2.398
4,mlp.up_proj,0.0000239637,0.05000,2.397
4,mlp.down_proj,0.0000006565,0.05000,2.979
5,self_attn.k_proj,0.0000427736,0.05000,4.861
5,self_attn.q_proj,0.0000706749,0.05000,4.928
5,self_attn.v_proj,0.0000049181,0.05000,4.948
5,self_attn.o_proj,0.0000002486,0.05000,1.045
5,mlp.gate_proj,0.0000385495,0.05000,2.308
5,mlp.up_proj,0.0000273058,0.05000,2.328
5,mlp.down_proj,0.0000008979,0.05000,3.011
6,self_attn.k_proj,0.0000308670,0.05000,4.806
6,self_attn.q_proj,0.0000581398,0.05000,4.846
6,self_attn.v_proj,0.0000045627,0.05000,4.885
6,self_attn.o_proj,0.0000003760,0.05000,1.026
6,mlp.up_proj,0.0000291855,0.05000,2.248
6,mlp.gate_proj,0.0000411316,0.05000,2.276
6,mlp.down_proj,0.0000010291,0.05000,3.053
7,self_attn.v_proj,0.0000040204,0.05000,4.955
7,self_attn.q_proj,0.0000492176,0.05000,5.028
7,self_attn.k_proj,0.0000289958,0.05000,5.047
7,self_attn.o_proj,0.0000005308,0.05000,1.029
7,mlp.up_proj,0.0000311719,0.05000,2.240
7,mlp.gate_proj,0.0000403425,0.05000,2.263
7,mlp.down_proj,0.0000011793,0.05000,2.982
8,self_attn.q_proj,0.0000629039,0.05000,4.885
8,self_attn.v_proj,0.0000050514,0.05000,4.884
8,self_attn.k_proj,0.0000379682,0.05000,4.907
8,self_attn.o_proj,0.0000006740,0.05000,1.021
8,mlp.gate_proj,0.0000429643,0.05000,2.269
8,mlp.up_proj,0.0000325408,0.05000,2.289
8,mlp.down_proj,0.0000012385,0.05000,2.952
9,self_attn.k_proj,0.0000347599,0.05000,4.724
9,self_attn.v_proj,0.0000061279,0.05000,4.800
9,self_attn.q_proj,0.0000586902,0.05000,4.826
9,self_attn.o_proj,0.0000007288,0.05000,1.020
9,mlp.up_proj,0.0000325744,0.05000,2.317
9,mlp.gate_proj,0.0000417335,0.05000,2.339
9,mlp.down_proj,0.0000012440,0.05000,2.978
10,self_attn.k_proj,0.0000367776,0.05000,4.779
10,self_attn.v_proj,0.0000047437,0.05000,4.837
10,self_attn.q_proj,0.0000589483,0.05000,4.863
10,self_attn.o_proj,0.0000007045,0.05000,1.006
10,mlp.up_proj,0.0000355694,0.05000,2.252
10,mlp.gate_proj,0.0000426607,0.05000,2.269
10,mlp.down_proj,0.0000014252,0.05000,2.945
11,self_attn.v_proj,0.0000056424,0.05000,4.732
11,self_attn.k_proj,0.0000276094,0.05000,4.769
11,self_attn.q_proj,0.0000490194,0.05000,4.803
11,self_attn.o_proj,0.0000008918,0.05000,1.040
11,mlp.gate_proj,0.0000447233,0.05000,2.266
11,mlp.up_proj,0.0000384965,0.05000,2.282
11,mlp.down_proj,0.0000016130,0.05000,2.992
12,self_attn.v_proj,0.0000062276,0.05000,4.821
12,self_attn.q_proj,0.0000707292,0.05000,4.844
12,self_attn.k_proj,0.0000422481,0.05000,4.875
12,self_attn.o_proj,0.0000010586,0.05000,1.013
12,mlp.up_proj,0.0000412534,0.05000,2.241
12,mlp.gate_proj,0.0000475553,0.05000,2.264
12,mlp.down_proj,0.0000018156,0.05000,2.954
13,self_attn.q_proj,0.0000746849,0.05000,4.776
13,self_attn.k_proj,0.0000481807,0.05000,4.811
13,self_attn.v_proj,0.0000073153,0.05000,4.843
13,self_attn.o_proj,0.0000012458,0.05000,1.019
13,mlp.up_proj,0.0000458390,0.05000,2.347
13,mlp.gate_proj,0.0000565005,0.05000,2.372
13,mlp.down_proj,0.0000024355,0.05000,2.979
14,self_attn.k_proj,0.0000378307,0.05000,4.768
14,self_attn.q_proj,0.0000819438,0.05000,4.881
14,self_attn.v_proj,0.0000085098,0.05000,4.895
14,self_attn.o_proj,0.0000014740,0.05000,1.037
14,mlp.up_proj,0.0000496886,0.05000,2.248
14,mlp.gate_proj,0.0000617393,0.05000,2.267
14,mlp.down_proj,0.0000029373,0.05000,2.928
15,self_attn.v_proj,0.0000085283,0.05000,4.719
15,self_attn.k_proj,0.0000434987,0.05000,4.767
15,self_attn.q_proj,0.0000836970,0.05000,4.813
15,self_attn.o_proj,0.0000009290,0.05000,1.022
15,mlp.gate_proj,0.0000665099,0.05000,2.268
15,mlp.up_proj,0.0000498216,0.05000,2.299
15,mlp.down_proj,0.0000030499,0.05000,2.931
16,self_attn.q_proj,0.0000871859,0.05000,4.742
16,self_attn.k_proj,0.0000490420,0.05000,4.803
16,self_attn.v_proj,0.0000096564,0.05000,4.838
16,self_attn.o_proj,0.0000006811,0.05000,1.031
16,mlp.gate_proj,0.0000679251,0.05000,2.348
16,mlp.up_proj,0.0000497399,0.05000,2.371
16,mlp.down_proj,0.0000029397,0.05000,2.928
17,self_attn.q_proj,0.0000832187,0.05000,4.610
17,self_attn.v_proj,0.0000093413,0.05000,4.761
17,self_attn.k_proj,0.0000452292,0.05000,4.788
17,self_attn.o_proj,0.0000006960,0.05000,1.055
17,mlp.up_proj,0.0000515798,0.05000,2.270
17,mlp.gate_proj,0.0000716405,0.05000,2.285
17,mlp.down_proj,0.0000032355,0.05000,2.951
18,self_attn.v_proj,0.0000116876,0.05000,4.763
18,self_attn.k_proj,0.0000521751,0.05000,4.815
18,self_attn.q_proj,0.0000943415,0.05000,4.836
18,self_attn.o_proj,0.0000007594,0.05000,1.024
18,mlp.gate_proj,0.0000745484,0.05000,2.296
18,mlp.up_proj,0.0000550380,0.05000,2.313
18,mlp.down_proj,0.0000034218,0.05000,2.943
19,self_attn.q_proj,0.0000838091,0.05000,4.770
19,self_attn.v_proj,0.0000114893,0.05000,4.822
19,self_attn.k_proj,0.0000487867,0.05000,4.842
19,self_attn.o_proj,0.0000009830,0.05000,1.065
19,mlp.gate_proj,0.0000804051,0.05000,2.317
19,mlp.up_proj,0.0000600660,0.05000,2.329
19,mlp.down_proj,0.0000042528,0.05000,2.947
20,self_attn.q_proj,0.0000873402,0.05000,4.741
20,self_attn.v_proj,0.0000137784,0.05000,4.810
20,self_attn.k_proj,0.0000528894,0.05000,4.846
20,self_attn.o_proj,0.0000007206,0.05000,1.018
20,mlp.gate_proj,0.0000781699,0.05000,2.336
20,mlp.up_proj,0.0000613666,0.05000,2.355
20,mlp.down_proj,0.0000041109,0.05000,2.994
21,self_attn.q_proj,0.0000868639,0.05000,4.764
21,self_attn.k_proj,0.0000519479,0.05000,4.812
21,self_attn.v_proj,0.0000178721,0.05000,4.847
21,self_attn.o_proj,0.0000008751,0.05000,1.000
21,mlp.up_proj,0.0000650703,0.05000,2.354
21,mlp.gate_proj,0.0000835892,0.05000,2.381
21,mlp.down_proj,0.0000043956,0.05000,2.923
22,self_attn.v_proj,0.0000177718,0.05000,4.708
22,self_attn.k_proj,0.0000496925,0.05000,4.778
22,self_attn.q_proj,0.0000848907,0.05000,4.811
22,self_attn.o_proj,0.0000007073,0.05000,1.012
22,mlp.up_proj,0.0000702587,0.05000,2.325
22,mlp.gate_proj,0.0000909208,0.05000,2.340
22,mlp.down_proj,0.0000049920,0.05000,2.933
23,self_attn.v_proj,0.0000174651,0.05000,4.746
23,self_attn.q_proj,0.0000855378,0.05000,4.796
23,self_attn.k_proj,0.0000534188,0.05000,4.820
23,self_attn.o_proj,0.0000011784,0.05000,1.016
23,mlp.gate_proj,0.0001065173,0.05000,2.276
23,mlp.up_proj,0.0000778496,0.05000,2.298
23,mlp.down_proj,0.0000061454,0.05000,2.960
24,self_attn.v_proj,0.0000258239,0.05000,4.777
24,self_attn.k_proj,0.0000595539,0.05000,4.808
24,self_attn.q_proj,0.0000942610,0.05000,4.838
24,self_attn.o_proj,0.0000017130,0.05000,1.006
24,mlp.up_proj,0.0000840494,0.05000,2.249
24,mlp.gate_proj,0.0001173177,0.05000,2.264
24,mlp.down_proj,0.0000069910,0.05000,2.990
25,self_attn.q_proj,0.0000879538,0.05000,4.745
25,self_attn.k_proj,0.0000456781,0.05000,4.820
25,self_attn.v_proj,0.0000218952,0.05000,4.851
25,self_attn.o_proj,0.0000017481,0.05000,1.007
25,mlp.gate_proj,0.0001278075,0.05000,2.386
25,mlp.up_proj,0.0000906748,0.05000,2.420
25,mlp.down_proj,0.0000087553,0.05000,2.930
26,self_attn.v_proj,0.0000302323,0.05000,4.811
26,self_attn.q_proj,0.0000829504,0.05000,4.829
26,self_attn.k_proj,0.0000538115,0.05000,4.855
26,self_attn.o_proj,0.0000024757,0.05000,1.023
26,mlp.up_proj,0.0000947583,0.05000,2.376
26,mlp.gate_proj,0.0001355632,0.05000,2.382
26,mlp.down_proj,0.0000126745,0.05000,3.007
27,self_attn.k_proj,0.0000373252,0.05000,4.704
27,self_attn.v_proj,0.0000200833,0.05000,4.782
27,self_attn.q_proj,0.0000657733,0.05000,4.821
27,self_attn.o_proj,0.0000077384,0.05000,1.004
27,mlp.up_proj,0.0001052698,0.05000,2.324
27,mlp.gate_proj,0.0001328537,0.05000,2.335
27,mlp.down_proj,0.0000376315,0.05000,2.955