File size: 11,667 Bytes
b5a6aa7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 | layer,module,loss,samples,damp,time
0,self_attn.k_proj,0.0000003594,0.05000,4.072
0,self_attn.v_proj,0.0000003196,0.05000,4.159
0,self_attn.q_proj,0.0000013323,0.05000,4.189
0,self_attn.o_proj,0.0000008583,0.05000,1.501
0,mlp.gate_proj,0.0000935746,0.05000,2.208
0,mlp.up_proj,0.0000823376,0.05000,2.229
0,mlp.down_proj,0.0000093483,0.05000,3.781
1,self_attn.k_proj,0.0000005827,0.05000,4.169
1,self_attn.q_proj,0.0000022050,0.05000,4.270
1,self_attn.v_proj,0.0000006061,0.05000,4.286
1,self_attn.o_proj,0.0000013489,0.05000,1.449
1,mlp.up_proj,0.0012403375,0.05000,1.993
1,mlp.gate_proj,0.0027883543,0.05000,2.016
1,mlp.down_proj,0.0000149858,0.05000,3.810
2,self_attn.k_proj,0.0000017745,0.05000,4.243
2,self_attn.q_proj,0.0000062511,0.05000,4.262
2,self_attn.v_proj,0.0000017546,0.05000,4.358
2,self_attn.o_proj,0.0000015581,0.05000,1.491
2,mlp.up_proj,0.0037729626,0.05000,2.027
2,mlp.gate_proj,0.0049998074,0.05000,2.042
2,mlp.down_proj,0.0000086235,0.05000,3.796
3,self_attn.k_proj,0.0000031761,0.05000,4.226
3,self_attn.q_proj,0.0000124558,0.05000,4.254
3,self_attn.v_proj,0.0000033006,0.05000,4.299
3,self_attn.o_proj,0.0000027714,0.05000,1.471
3,mlp.up_proj,0.0025942083,0.05000,2.045
3,mlp.gate_proj,0.0050245722,0.05000,2.055
3,mlp.down_proj,0.0000275649,0.05000,3.933
4,self_attn.q_proj,0.0000246425,0.05000,4.286
4,self_attn.k_proj,0.0000067264,0.05000,4.288
4,self_attn.v_proj,0.0000068391,0.05000,4.351
4,self_attn.o_proj,0.0000045797,0.05000,1.461
4,mlp.gate_proj,0.0047045770,0.05000,2.064
4,mlp.up_proj,0.0019233117,0.05000,2.079
4,mlp.down_proj,0.0000489517,0.05000,3.845
5,self_attn.v_proj,0.0000077134,0.05000,4.167
5,self_attn.q_proj,0.0000278327,0.05000,4.278
5,self_attn.k_proj,0.0000069916,0.05000,4.305
5,self_attn.o_proj,0.0000070910,0.05000,1.466
5,mlp.up_proj,0.0008793870,0.05000,1.962
5,mlp.gate_proj,0.0017394822,0.05000,1.991
5,mlp.down_proj,0.0000607402,0.05000,3.847
6,self_attn.v_proj,0.0000168110,0.05000,4.083
6,self_attn.k_proj,0.0000146507,0.05000,4.192
6,self_attn.q_proj,0.0000625633,0.05000,4.207
6,self_attn.o_proj,0.0000228399,0.05000,1.457
6,mlp.up_proj,0.0011648920,0.05000,1.999
6,mlp.gate_proj,0.0019183903,0.05000,2.030
6,mlp.down_proj,0.0103364748,0.05000,3.803
7,self_attn.q_proj,0.0001241763,0.05000,4.408
7,self_attn.v_proj,0.0000359717,0.05000,4.429
7,self_attn.k_proj,0.0000332411,0.05000,4.472
7,self_attn.o_proj,0.0000210265,0.05000,1.481
7,mlp.up_proj,0.0013953543,0.05000,2.171
7,mlp.gate_proj,0.0021643315,0.05000,2.191
7,mlp.down_proj,0.0000991852,0.05000,3.856
8,self_attn.q_proj,0.0001985875,0.05000,4.326
8,self_attn.v_proj,0.0000591547,0.05000,4.352
8,self_attn.k_proj,0.0000495564,0.05000,4.384
8,self_attn.o_proj,0.0000302952,0.05000,1.425
8,mlp.gate_proj,0.0016333003,0.05000,2.126
8,mlp.up_proj,0.0013568534,0.05000,2.144
8,mlp.down_proj,0.0001514721,0.05000,3.858
9,self_attn.q_proj,0.0002371635,0.05000,4.318
9,self_attn.k_proj,0.0000649721,0.05000,4.368
9,self_attn.v_proj,0.0000708001,0.05000,4.392
9,self_attn.o_proj,0.0000351153,0.05000,1.461
9,mlp.gate_proj,0.0026187571,0.05000,2.003
9,mlp.up_proj,0.0017171524,0.05000,2.035
9,mlp.down_proj,0.0001497422,0.05000,3.754
10,self_attn.q_proj,0.0003617231,0.05000,4.318
10,self_attn.k_proj,0.0000914477,0.05000,4.385
10,self_attn.v_proj,0.0001139594,0.05000,4.396
10,self_attn.o_proj,0.0000542123,0.05000,1.558
10,mlp.up_proj,0.0014414165,0.05000,2.250
10,mlp.gate_proj,0.0020228552,0.05000,2.267
10,mlp.down_proj,0.0001341613,0.05000,3.767
11,self_attn.q_proj,0.0001707963,0.05000,4.467
11,self_attn.v_proj,0.0000511366,0.05000,4.515
11,self_attn.k_proj,0.0000445927,0.05000,4.552
11,self_attn.o_proj,0.0000347715,0.05000,1.577
11,mlp.gate_proj,0.0016375363,0.05000,2.061
11,mlp.up_proj,0.0013137128,0.05000,2.088
11,mlp.down_proj,0.0001213440,0.05000,3.846
12,self_attn.q_proj,0.0001992994,0.05000,4.345
12,self_attn.v_proj,0.0000592262,0.05000,4.403
12,self_attn.k_proj,0.0000508667,0.05000,4.437
12,self_attn.o_proj,0.0000389608,0.05000,1.487
12,mlp.up_proj,0.0012556559,0.05000,2.126
12,mlp.gate_proj,0.0014193900,0.05000,2.153
12,mlp.down_proj,0.0001204108,0.05000,3.814
13,self_attn.k_proj,0.0000377959,0.05000,4.361
13,self_attn.v_proj,0.0000398980,0.05000,4.377
13,self_attn.q_proj,0.0001468130,0.05000,4.406
13,self_attn.o_proj,0.0000275558,0.05000,1.572
13,mlp.gate_proj,0.0013068827,0.05000,1.982
13,mlp.up_proj,0.0012688375,0.05000,1.992
13,mlp.down_proj,0.0001261197,0.05000,3.786
14,self_attn.k_proj,0.0000636807,0.05000,4.260
14,self_attn.q_proj,0.0002550139,0.05000,4.352
14,self_attn.v_proj,0.0000732032,0.05000,4.377
14,self_attn.o_proj,0.0000563386,0.05000,1.452
14,mlp.gate_proj,0.0012963092,0.05000,2.037
14,mlp.up_proj,0.0012702085,0.05000,2.074
14,mlp.down_proj,0.0001155589,0.05000,3.801
15,self_attn.k_proj,0.0000619257,0.05000,4.268
15,self_attn.v_proj,0.0000667899,0.05000,4.398
15,self_attn.q_proj,0.0002482918,0.05000,4.422
15,self_attn.o_proj,0.0000455493,0.05000,1.434
15,mlp.gate_proj,0.0011929697,0.05000,2.083
15,mlp.up_proj,0.0012112681,0.05000,2.113
15,mlp.down_proj,0.0001076081,0.05000,3.777
16,self_attn.v_proj,0.0001263295,0.05000,4.350
16,self_attn.q_proj,0.0004257370,0.05000,4.410
16,self_attn.k_proj,0.0001013038,0.05000,4.432
16,self_attn.o_proj,0.0000473602,0.05000,1.420
16,mlp.gate_proj,0.0013676042,0.05000,2.118
16,mlp.up_proj,0.0013057378,0.05000,2.139
16,mlp.down_proj,0.0007478101,0.05000,3.826
17,self_attn.q_proj,0.0003468467,0.05000,4.276
17,self_attn.k_proj,0.0000746891,0.05000,4.327
17,self_attn.v_proj,0.0000924036,0.05000,4.352
17,self_attn.o_proj,0.0000458996,0.05000,1.441
17,mlp.gate_proj,0.0011096764,0.05000,2.045
17,mlp.up_proj,0.0010861262,0.05000,2.062
17,mlp.down_proj,0.0000963411,0.05000,3.819
18,self_attn.q_proj,0.0003796826,0.05000,4.261
18,self_attn.v_proj,0.0001097278,0.05000,4.273
18,self_attn.k_proj,0.0000885717,0.05000,4.317
18,self_attn.o_proj,0.0000499444,0.05000,1.491
18,mlp.up_proj,0.0011784257,0.05000,1.997
18,mlp.gate_proj,0.0011817602,0.05000,2.042
18,mlp.down_proj,0.0001195181,0.05000,3.730
19,self_attn.k_proj,0.0001662553,0.05000,4.191
19,self_attn.q_proj,0.0007413589,0.05000,4.338
19,self_attn.v_proj,0.0001895868,0.05000,4.369
19,self_attn.o_proj,0.0000687233,0.05000,1.466
19,mlp.gate_proj,0.0012607713,0.05000,2.092
19,mlp.up_proj,0.0012681201,0.05000,2.113
19,mlp.down_proj,0.0001470167,0.05000,3.758
20,self_attn.k_proj,0.0001367688,0.05000,4.472
20,self_attn.q_proj,0.0006558189,0.05000,4.530
20,self_attn.v_proj,0.0001652159,0.05000,4.549
20,self_attn.o_proj,0.0000642542,0.05000,1.464
20,mlp.gate_proj,0.0013226730,0.05000,2.004
20,mlp.up_proj,0.0013584104,0.05000,2.027
20,mlp.down_proj,0.0001604849,0.05000,3.849
21,self_attn.v_proj,0.0002245796,0.05000,4.389
21,self_attn.q_proj,0.0008645666,0.05000,4.438
21,self_attn.k_proj,0.0001859100,0.05000,4.476
21,self_attn.o_proj,0.0000763351,0.05000,1.458
21,mlp.up_proj,0.0016042628,0.05000,2.163
21,mlp.gate_proj,0.0015679000,0.05000,2.186
21,mlp.down_proj,0.0002100836,0.05000,3.945
22,self_attn.q_proj,0.0016993005,0.05000,4.338
22,self_attn.k_proj,0.0003806636,0.05000,4.384
22,self_attn.v_proj,0.0004654586,0.05000,4.407
22,self_attn.o_proj,0.0001504707,0.05000,1.440
22,mlp.up_proj,0.0017626628,0.05000,2.070
22,mlp.gate_proj,0.0018012372,0.05000,2.091
22,mlp.down_proj,0.0003374911,0.05000,3.859
23,self_attn.k_proj,0.0003534277,0.05000,4.390
23,self_attn.v_proj,0.0004446394,0.05000,4.453
23,self_attn.q_proj,0.0016562948,0.05000,4.478
23,self_attn.o_proj,0.0001922144,0.05000,1.483
23,mlp.gate_proj,0.0022250249,0.05000,2.107
23,mlp.up_proj,0.0020641453,0.05000,2.127
23,mlp.down_proj,0.0004860388,0.05000,3.830
24,self_attn.v_proj,0.0008083182,0.05000,4.367
24,self_attn.k_proj,0.0005927957,0.05000,4.460
24,self_attn.q_proj,0.0027235131,0.05000,4.483
24,self_attn.o_proj,0.0002267568,0.05000,1.471
24,mlp.up_proj,0.0022716492,0.05000,2.094
24,mlp.gate_proj,0.0025005769,0.05000,2.118
24,mlp.down_proj,0.0005901432,0.05000,3.820
25,self_attn.q_proj,0.0019246214,0.05000,4.292
25,self_attn.k_proj,0.0004538385,0.05000,4.343
25,self_attn.v_proj,0.0005557584,0.05000,4.381
25,self_attn.o_proj,0.0001088033,0.05000,1.481
25,mlp.up_proj,0.0026175110,0.05000,2.103
25,mlp.gate_proj,0.0029235990,0.05000,2.102
25,mlp.down_proj,0.0007230263,0.05000,3.834
26,self_attn.q_proj,0.0030830475,0.05000,4.367
26,self_attn.v_proj,0.0008666406,0.05000,4.417
26,self_attn.k_proj,0.0006896122,0.05000,4.424
26,self_attn.o_proj,0.0001164570,0.05000,1.441
26,mlp.gate_proj,0.0035159102,0.05000,2.105
26,mlp.up_proj,0.0032400480,0.05000,2.139
26,mlp.down_proj,0.0008100310,0.05000,3.738
27,self_attn.q_proj,0.0038217060,0.05000,4.317
27,self_attn.k_proj,0.0008253228,0.05000,4.359
27,self_attn.v_proj,0.0011545808,0.05000,4.379
27,self_attn.o_proj,0.0001481975,0.05000,1.495
27,mlp.gate_proj,0.0038771487,0.05000,2.060
27,mlp.up_proj,0.0036957798,0.05000,2.074
27,mlp.down_proj,0.0010563065,0.05000,3.884
28,self_attn.v_proj,0.0012145786,0.05000,4.342
28,self_attn.q_proj,0.0040962543,0.05000,4.407
28,self_attn.k_proj,0.0009609835,0.05000,4.435
28,self_attn.o_proj,0.0002459575,0.05000,1.536
28,mlp.gate_proj,0.0043499027,0.05000,2.249
28,mlp.up_proj,0.0043100494,0.05000,2.284
28,mlp.down_proj,0.0015757555,0.05000,3.881
29,self_attn.k_proj,0.0021209687,0.05000,4.298
29,self_attn.q_proj,0.0095595865,0.05000,4.304
29,self_attn.v_proj,0.0029053794,0.05000,4.363
29,self_attn.o_proj,0.0001616285,0.05000,1.479
29,mlp.up_proj,0.0052241395,0.05000,2.076
29,mlp.gate_proj,0.0050324519,0.05000,2.098
29,mlp.down_proj,0.0018942906,0.05000,3.788
30,self_attn.v_proj,0.0036343644,0.05000,4.247
30,self_attn.k_proj,0.0027200088,0.05000,4.341
30,self_attn.q_proj,0.0110439847,0.05000,4.364
30,self_attn.o_proj,0.0004099395,0.05000,1.508
30,mlp.up_proj,0.0057803822,0.05000,2.120
30,mlp.gate_proj,0.0053990015,0.05000,2.146
30,mlp.down_proj,0.0026668868,0.05000,3.796
31,self_attn.k_proj,0.0035780576,0.05000,4.280
31,self_attn.v_proj,0.0051702445,0.05000,4.344
31,self_attn.q_proj,0.0141764066,0.05000,4.371
31,self_attn.o_proj,0.0004475123,0.05000,1.467
31,mlp.gate_proj,0.0054525739,0.05000,1.951
31,mlp.up_proj,0.0061484700,0.05000,1.985
31,mlp.down_proj,0.0033983206,0.05000,3.878
32,self_attn.k_proj,0.0049337751,0.05000,4.351
32,self_attn.q_proj,0.0206758933,0.05000,4.435
32,self_attn.v_proj,0.0077025177,0.05000,4.465
32,self_attn.o_proj,0.0005332192,0.05000,1.471
32,mlp.gate_proj,0.0057657471,0.05000,2.036
32,mlp.up_proj,0.0066501302,0.05000,2.062
32,mlp.down_proj,0.0043100822,0.05000,3.855
33,self_attn.q_proj,0.0426740829,0.05000,4.337
33,self_attn.k_proj,0.0085745315,0.05000,4.387
33,self_attn.v_proj,0.0163852944,0.05000,4.405
33,self_attn.o_proj,0.0007346065,0.05000,1.459
33,mlp.up_proj,0.0073134509,0.05000,2.152
33,mlp.gate_proj,0.0062658977,0.05000,2.176
33,mlp.down_proj,0.0056324352,0.05000,3.802
34,self_attn.v_proj,0.0134164695,0.05000,4.412
34,self_attn.q_proj,0.0347782521,0.05000,4.430
34,self_attn.k_proj,0.0076354003,0.05000,4.462
34,self_attn.o_proj,0.0018579574,0.05000,1.457
34,mlp.gate_proj,0.0080567708,0.05000,2.135
34,mlp.up_proj,0.0087948156,0.05000,2.168
34,mlp.down_proj,0.0079165159,0.05000,3.861
35,self_attn.q_proj,0.0188541359,0.05000,4.320
35,self_attn.v_proj,0.0064772646,0.05000,4.414
35,self_attn.k_proj,0.0047970992,0.05000,4.437
35,self_attn.o_proj,0.0027409442,0.05000,1.486
35,mlp.up_proj,0.0270876766,0.05000,2.115
35,mlp.gate_proj,0.0218691803,0.05000,2.126
35,mlp.down_proj,0.0207865040,0.05000,3.860
|