File size: 11,667 Bytes
bc97a36 2511587 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 | layer,module,loss,samples,damp,time
0,self_attn.q_proj,0.0000004658,0.05000,5.010
0,self_attn.v_proj,0.0000001105,0.05000,5.075
0,self_attn.k_proj,0.0000001265,0.05000,5.110
0,self_attn.o_proj,0.0000002945,0.05000,1.782
0,mlp.gate_proj,0.0000289746,0.05000,2.652
0,mlp.up_proj,0.0000252975,0.05000,2.701
0,mlp.down_proj,0.0000037377,0.05000,4.487
1,self_attn.v_proj,0.0000002128,0.05000,5.102
1,self_attn.k_proj,0.0000002043,0.05000,5.172
1,self_attn.q_proj,0.0000007588,0.05000,5.212
1,self_attn.o_proj,0.0000003918,0.05000,1.697
1,mlp.up_proj,0.0004287361,0.05000,2.695
1,mlp.gate_proj,0.0009483043,0.05000,2.728
1,mlp.down_proj,0.0000043074,0.05000,4.332
2,self_attn.q_proj,0.0000021343,0.05000,5.001
2,self_attn.v_proj,0.0000005962,0.05000,5.076
2,self_attn.k_proj,0.0000006101,0.05000,5.116
2,self_attn.o_proj,0.0000005367,0.05000,1.751
2,mlp.gate_proj,0.0015602610,0.05000,2.559
2,mlp.up_proj,0.0012285131,0.05000,2.596
2,mlp.down_proj,0.0000029970,0.05000,4.283
3,self_attn.q_proj,0.0000041543,0.05000,5.029
3,self_attn.v_proj,0.0000011004,0.05000,5.103
3,self_attn.k_proj,0.0000010634,0.05000,5.118
3,self_attn.o_proj,0.0000007820,0.05000,1.644
3,mlp.up_proj,0.0008446519,0.05000,2.626
3,mlp.gate_proj,0.0016393722,0.05000,2.648
3,mlp.down_proj,0.0000086119,0.05000,4.336
4,self_attn.k_proj,0.0000021310,0.05000,5.020
4,self_attn.q_proj,0.0000078954,0.05000,5.044
4,self_attn.v_proj,0.0000021595,0.05000,5.111
4,self_attn.o_proj,0.0000012900,0.05000,1.737
4,mlp.up_proj,0.0005500840,0.05000,2.736
4,mlp.gate_proj,0.0013762149,0.05000,2.738
4,mlp.down_proj,0.0000128145,0.05000,4.359
5,self_attn.q_proj,0.0000086130,0.05000,4.662
5,self_attn.v_proj,0.0000023674,0.05000,4.759
5,self_attn.k_proj,0.0000021791,0.05000,4.780
5,self_attn.o_proj,0.0000023793,0.05000,1.532
5,mlp.up_proj,0.0002447622,0.05000,2.342
5,mlp.gate_proj,0.0005030251,0.05000,2.389
5,mlp.down_proj,0.0000185442,0.05000,3.948
6,self_attn.k_proj,0.0000044010,0.05000,4.673
6,self_attn.v_proj,0.0000050325,0.05000,4.750
6,self_attn.q_proj,0.0000186530,0.05000,4.770
6,self_attn.o_proj,0.0000058681,0.05000,1.525
6,mlp.gate_proj,0.0005370448,0.05000,2.268
6,mlp.up_proj,0.0003059179,0.05000,2.269
6,mlp.down_proj,0.0009229853,0.05000,4.023
7,self_attn.q_proj,0.0000376636,0.05000,4.710
7,self_attn.k_proj,0.0000101153,0.05000,4.744
7,self_attn.v_proj,0.0000104857,0.05000,4.764
7,self_attn.o_proj,0.0000067601,0.05000,1.529
7,mlp.up_proj,0.0003681960,0.05000,2.167
7,mlp.gate_proj,0.0006117455,0.05000,2.191
7,mlp.down_proj,0.0000305151,0.05000,3.961
8,self_attn.k_proj,0.0000149234,0.05000,4.718
8,self_attn.v_proj,0.0000164171,0.05000,4.838
8,self_attn.q_proj,0.0000576840,0.05000,4.853
8,self_attn.o_proj,0.0000096945,0.05000,1.528
8,mlp.gate_proj,0.0004354699,0.05000,2.192
8,mlp.up_proj,0.0003553537,0.05000,2.220
8,mlp.down_proj,0.0000482470,0.05000,3.933
9,self_attn.q_proj,0.0000708480,0.05000,4.591
9,self_attn.v_proj,0.0000198840,0.05000,4.660
9,self_attn.k_proj,0.0000197770,0.05000,4.679
9,self_attn.o_proj,0.0000112224,0.05000,1.536
9,mlp.up_proj,0.0004471922,0.05000,2.262
9,mlp.gate_proj,0.0007011541,0.05000,2.280
9,mlp.down_proj,0.0000475860,0.05000,4.021
10,self_attn.v_proj,0.0000316658,0.05000,4.767
10,self_attn.k_proj,0.0000271843,0.05000,4.818
10,self_attn.q_proj,0.0001055211,0.05000,4.854
10,self_attn.o_proj,0.0000176322,0.05000,1.520
10,mlp.up_proj,0.0003787521,0.05000,2.269
10,mlp.gate_proj,0.0005428473,0.05000,2.288
10,mlp.down_proj,0.0000400349,0.05000,4.012
11,self_attn.v_proj,0.0000144231,0.05000,4.735
11,self_attn.k_proj,0.0000135963,0.05000,4.801
11,self_attn.q_proj,0.0000505602,0.05000,4.845
11,self_attn.o_proj,0.0000114296,0.05000,1.576
11,mlp.up_proj,0.0003493419,0.05000,2.223
11,mlp.gate_proj,0.0004413690,0.05000,2.253
11,mlp.down_proj,0.0000366131,0.05000,4.017
12,self_attn.k_proj,0.0000153139,0.05000,4.535
12,self_attn.v_proj,0.0000167666,0.05000,4.586
12,self_attn.q_proj,0.0000584962,0.05000,4.616
12,self_attn.o_proj,0.0000125013,0.05000,1.622
12,mlp.gate_proj,0.0003892739,0.05000,2.209
12,mlp.up_proj,0.0003392469,0.05000,2.254
12,mlp.down_proj,0.0000370598,0.05000,4.058
13,self_attn.k_proj,0.0000116586,0.05000,4.696
13,self_attn.q_proj,0.0000443353,0.05000,4.778
13,self_attn.v_proj,0.0000114623,0.05000,4.786
13,self_attn.o_proj,0.0000097119,0.05000,1.567
13,mlp.gate_proj,0.0003625256,0.05000,2.177
13,mlp.up_proj,0.0003459045,0.05000,2.200
13,mlp.down_proj,0.0000401894,0.05000,3.992
14,self_attn.k_proj,0.0000190907,0.05000,4.754
14,self_attn.v_proj,0.0000204954,0.05000,4.793
14,self_attn.q_proj,0.0000746915,0.05000,4.818
14,self_attn.o_proj,0.0000171502,0.05000,1.543
14,mlp.up_proj,0.0003421074,0.05000,2.281
14,mlp.gate_proj,0.0003563725,0.05000,2.298
14,mlp.down_proj,0.0000358082,0.05000,4.040
15,self_attn.v_proj,0.0000185207,0.05000,4.646
15,self_attn.k_proj,0.0000190547,0.05000,4.731
15,self_attn.q_proj,0.0000740901,0.05000,4.774
15,self_attn.o_proj,0.0000141826,0.05000,1.478
15,mlp.gate_proj,0.0003278327,0.05000,2.248
15,mlp.up_proj,0.0003265059,0.05000,2.267
15,mlp.down_proj,0.0000344754,0.05000,4.042
16,self_attn.k_proj,0.0000315280,0.05000,4.783
16,self_attn.v_proj,0.0000365092,0.05000,4.807
16,self_attn.q_proj,0.0001286270,0.05000,4.856
16,self_attn.o_proj,0.0000152998,0.05000,1.529
16,mlp.up_proj,0.0003464136,0.05000,2.168
16,mlp.gate_proj,0.0003686556,0.05000,2.192
16,mlp.down_proj,0.0003761285,0.05000,4.012
17,self_attn.v_proj,0.0000274548,0.05000,4.671
17,self_attn.q_proj,0.0001093356,0.05000,4.719
17,self_attn.k_proj,0.0000240166,0.05000,4.734
17,self_attn.o_proj,0.0000160825,0.05000,1.532
17,mlp.up_proj,0.0003082692,0.05000,2.155
17,mlp.gate_proj,0.0003231052,0.05000,2.188
17,mlp.down_proj,0.0000337791,0.05000,4.057
18,self_attn.v_proj,0.0000330313,0.05000,4.643
18,self_attn.q_proj,0.0001190752,0.05000,4.730
18,self_attn.k_proj,0.0000287658,0.05000,4.754
18,self_attn.o_proj,0.0000195600,0.05000,1.516
18,mlp.up_proj,0.0003365247,0.05000,2.106
18,mlp.gate_proj,0.0003462859,0.05000,2.140
18,mlp.down_proj,0.0000416005,0.05000,3.978
19,self_attn.q_proj,0.0002362754,0.05000,4.728
19,self_attn.v_proj,0.0000574272,0.05000,4.794
19,self_attn.k_proj,0.0000544455,0.05000,4.836
19,self_attn.o_proj,0.0000249074,0.05000,1.527
19,mlp.up_proj,0.0003698838,0.05000,2.291
19,mlp.gate_proj,0.0003773468,0.05000,2.316
19,mlp.down_proj,0.0000530812,0.05000,3.979
20,self_attn.q_proj,0.0002188809,0.05000,4.637
20,self_attn.k_proj,0.0000471045,0.05000,4.642
20,self_attn.v_proj,0.0000521005,0.05000,4.717
20,self_attn.o_proj,0.0000248449,0.05000,1.551
20,mlp.up_proj,0.0004029941,0.05000,2.137
20,mlp.gate_proj,0.0004013012,0.05000,2.158
20,mlp.down_proj,0.0000585520,0.05000,4.006
21,self_attn.v_proj,0.0000706983,0.05000,4.767
21,self_attn.k_proj,0.0000634547,0.05000,4.821
21,self_attn.q_proj,0.0002828724,0.05000,4.833
21,self_attn.o_proj,0.0000280543,0.05000,1.523
21,mlp.up_proj,0.0004820233,0.05000,2.173
21,mlp.gate_proj,0.0004849778,0.05000,2.193
21,mlp.down_proj,0.0000768389,0.05000,3.981
22,self_attn.v_proj,0.0001434762,0.05000,4.732
22,self_attn.k_proj,0.0001253538,0.05000,4.819
22,self_attn.q_proj,0.0005631076,0.05000,4.846
22,self_attn.o_proj,0.0000553381,0.05000,1.557
22,mlp.up_proj,0.0005509696,0.05000,2.289
22,mlp.gate_proj,0.0005793622,0.05000,2.321
22,mlp.down_proj,0.0001229682,0.05000,4.013
23,self_attn.k_proj,0.0001267572,0.05000,4.744
23,self_attn.q_proj,0.0005634705,0.05000,4.804
23,self_attn.v_proj,0.0001490576,0.05000,4.816
23,self_attn.o_proj,0.0000699550,0.05000,1.576
23,mlp.up_proj,0.0006518718,0.05000,2.161
23,mlp.gate_proj,0.0007231190,0.05000,2.181
23,mlp.down_proj,0.0001715308,0.05000,4.017
24,self_attn.k_proj,0.0002084854,0.05000,4.662
24,self_attn.q_proj,0.0009156117,0.05000,4.669
24,self_attn.v_proj,0.0002740377,0.05000,4.713
24,self_attn.o_proj,0.0000720820,0.05000,1.559
24,mlp.up_proj,0.0007225850,0.05000,2.197
24,mlp.gate_proj,0.0008156972,0.05000,2.225
24,mlp.down_proj,0.0002084210,0.05000,3.929
25,self_attn.k_proj,0.0001581930,0.05000,4.615
25,self_attn.v_proj,0.0001814060,0.05000,4.679
25,self_attn.q_proj,0.0006456851,0.05000,4.706
25,self_attn.o_proj,0.0000473329,0.05000,1.560
25,mlp.up_proj,0.0008242684,0.05000,2.142
25,mlp.gate_proj,0.0009428286,0.05000,2.172
25,mlp.down_proj,0.0002486016,0.05000,3.990
26,self_attn.k_proj,0.0002404927,0.05000,4.621
26,self_attn.v_proj,0.0002888501,0.05000,4.642
26,self_attn.q_proj,0.0010402773,0.05000,4.699
26,self_attn.o_proj,0.0000435772,0.05000,1.576
26,mlp.gate_proj,0.0011219122,0.05000,2.293
26,mlp.up_proj,0.0010119856,0.05000,2.308
26,mlp.down_proj,0.0002938876,0.05000,3.967
27,self_attn.q_proj,0.0012800641,0.05000,4.697
27,self_attn.v_proj,0.0003850639,0.05000,4.761
27,self_attn.k_proj,0.0002874724,0.05000,4.777
27,self_attn.o_proj,0.0000617831,0.05000,1.501
27,mlp.gate_proj,0.0012472775,0.05000,2.290
27,mlp.up_proj,0.0011642600,0.05000,2.332
27,mlp.down_proj,0.0004163454,0.05000,3.981
28,self_attn.k_proj,0.0003459948,0.05000,4.645
28,self_attn.q_proj,0.0013993718,0.05000,4.743
28,self_attn.v_proj,0.0004060790,0.05000,4.769
28,self_attn.o_proj,0.0000979789,0.05000,1.566
28,mlp.up_proj,0.0013726200,0.05000,2.287
28,mlp.gate_proj,0.0014115914,0.05000,2.297
28,mlp.down_proj,0.0006263638,0.05000,4.001
29,self_attn.k_proj,0.0007600260,0.05000,4.717
29,self_attn.v_proj,0.0010328331,0.05000,4.782
29,self_attn.q_proj,0.0033356990,0.05000,4.817
29,self_attn.o_proj,0.0000782954,0.05000,1.573
29,mlp.up_proj,0.0016839991,0.05000,2.209
29,mlp.gate_proj,0.0016476954,0.05000,2.240
29,mlp.down_proj,0.0006718912,0.05000,4.071
30,self_attn.k_proj,0.0009500644,0.05000,4.648
30,self_attn.q_proj,0.0037828581,0.05000,4.691
30,self_attn.v_proj,0.0012155685,0.05000,4.745
30,self_attn.o_proj,0.0001820555,0.05000,1.580
30,mlp.gate_proj,0.0017180550,0.05000,2.131
30,mlp.up_proj,0.0018161121,0.05000,2.136
30,mlp.down_proj,0.0009175093,0.05000,4.057
31,self_attn.q_proj,0.0047508813,0.05000,4.746
31,self_attn.k_proj,0.0012552110,0.05000,4.801
31,self_attn.v_proj,0.0017514399,0.05000,4.843
31,self_attn.o_proj,0.0001683568,0.05000,1.558
31,mlp.gate_proj,0.0016933631,0.05000,2.218
31,mlp.up_proj,0.0018819273,0.05000,2.249
31,mlp.down_proj,0.0011198272,0.05000,4.011
32,self_attn.q_proj,0.0068592699,0.05000,4.781
32,self_attn.v_proj,0.0025475807,0.05000,4.837
32,self_attn.k_proj,0.0016688757,0.05000,4.866
32,self_attn.o_proj,0.0001884213,0.05000,1.525
32,mlp.up_proj,0.0019701249,0.05000,2.197
32,mlp.gate_proj,0.0017300433,0.05000,2.227
32,mlp.down_proj,0.0013363534,0.05000,4.113
33,self_attn.q_proj,0.0142963547,0.05000,4.643
33,self_attn.k_proj,0.0029161619,0.05000,4.714
33,self_attn.v_proj,0.0055924580,0.05000,4.730
33,self_attn.o_proj,0.0002554161,0.05000,1.524
33,mlp.up_proj,0.0021203640,0.05000,2.182
33,mlp.gate_proj,0.0018360139,0.05000,2.221
33,mlp.down_proj,0.0017152019,0.05000,4.001
34,self_attn.q_proj,0.0111581404,0.05000,4.838
34,self_attn.v_proj,0.0041854484,0.05000,4.842
34,self_attn.k_proj,0.0024777448,0.05000,4.885
34,self_attn.o_proj,0.0005260598,0.05000,1.526
34,mlp.gate_proj,0.0021799901,0.05000,2.246
34,mlp.up_proj,0.0023547660,0.05000,2.265
34,mlp.down_proj,0.0020657629,0.05000,4.042
35,self_attn.q_proj,0.0052947048,0.05000,4.801
35,self_attn.k_proj,0.0013647166,0.05000,4.871
35,self_attn.v_proj,0.0018166249,0.05000,4.910
35,self_attn.o_proj,0.0007476909,0.05000,1.543
35,mlp.up_proj,0.0045160744,0.05000,2.199
35,mlp.gate_proj,0.0039883456,0.05000,2.221
35,mlp.down_proj,0.0036397912,0.05000,4.071
|