File size: 11,667 Bytes
eafc2e5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 | layer,module,loss,samples,damp,time
0,self_attn.v_proj,0.0000000085,0.05000,2.499
0,self_attn.q_proj,0.0000003401,0.05000,2.506
0,self_attn.k_proj,0.0000000715,0.05000,2.508
0,self_attn.o_proj,0.0000000277,0.05000,0.743
0,mlp.gate_proj,0.0000003923,0.05000,1.322
0,mlp.up_proj,0.0000003028,0.05000,1.327
0,mlp.down_proj,0.0000000544,0.05000,4.242
1,self_attn.v_proj,0.0000000033,0.05000,2.483
1,self_attn.k_proj,0.0000000173,0.05000,2.489
1,self_attn.q_proj,0.0000000622,0.05000,2.510
1,self_attn.o_proj,0.0000000095,0.05000,0.751
1,mlp.gate_proj,0.0000222209,0.05000,1.360
1,mlp.up_proj,0.0000169856,0.05000,1.362
1,mlp.down_proj,0.0000000056,0.05000,4.237
2,self_attn.q_proj,0.0000000956,0.05000,2.469
2,self_attn.v_proj,0.0000000054,0.05000,2.479
2,self_attn.k_proj,0.0000000210,0.05000,2.491
2,self_attn.o_proj,0.0000000061,0.05000,0.737
2,mlp.gate_proj,0.0000048486,0.05000,1.341
2,mlp.up_proj,0.0000049113,0.05000,1.345
2,mlp.down_proj,0.0000085108,0.05000,4.254
3,self_attn.q_proj,0.0000004073,0.05000,2.481
3,self_attn.k_proj,0.0000000878,0.05000,2.510
3,self_attn.v_proj,0.0000000161,0.05000,2.516
3,self_attn.o_proj,0.0000000094,0.05000,0.749
3,mlp.up_proj,0.0000058005,0.05000,1.352
3,mlp.gate_proj,0.0000064317,0.05000,1.359
3,mlp.down_proj,0.0000001039,0.05000,4.281
4,self_attn.k_proj,0.0000187172,0.05000,2.382
4,self_attn.q_proj,0.0000959323,0.05000,2.390
4,self_attn.v_proj,0.0000064129,0.05000,2.408
4,self_attn.o_proj,0.0000053439,0.05000,0.765
4,mlp.up_proj,0.0013105773,0.05000,1.337
4,mlp.gate_proj,0.0017679583,0.05000,1.337
4,mlp.down_proj,0.0000160065,0.05000,4.241
5,self_attn.v_proj,0.0000164499,0.05000,2.488
5,self_attn.q_proj,0.0002037998,0.05000,2.516
5,self_attn.k_proj,0.0000362469,0.05000,2.522
5,self_attn.o_proj,0.0000034657,0.05000,0.749
5,mlp.gate_proj,0.0024380072,0.05000,1.371
5,mlp.up_proj,0.0020617547,0.05000,1.375
5,mlp.down_proj,0.0000293610,0.05000,4.193
6,self_attn.q_proj,0.0000004554,0.05000,2.422
6,self_attn.k_proj,0.0000000725,0.05000,2.462
6,self_attn.v_proj,0.0000000506,0.05000,2.476
6,self_attn.o_proj,0.0000000303,0.05000,0.751
6,mlp.up_proj,0.0000094722,0.05000,1.373
6,mlp.gate_proj,0.0000109800,0.05000,1.376
6,mlp.down_proj,0.0000001408,0.05000,4.287
7,self_attn.q_proj,0.0001760753,0.05000,2.410
7,self_attn.k_proj,0.0000278523,0.05000,2.422
7,self_attn.v_proj,0.0000177654,0.05000,2.430
7,self_attn.o_proj,0.0000057125,0.05000,0.748
7,mlp.up_proj,0.0026567668,0.05000,1.384
7,mlp.gate_proj,0.0032646559,0.05000,1.394
7,mlp.down_proj,0.0000566328,0.05000,4.200
8,self_attn.v_proj,0.0000000660,0.05000,2.384
8,self_attn.q_proj,0.0000006230,0.05000,2.389
8,self_attn.k_proj,0.0000000940,0.05000,2.421
8,self_attn.o_proj,0.0000000298,0.05000,0.757
8,mlp.gate_proj,0.0000123745,0.05000,1.376
8,mlp.up_proj,0.0000088540,0.05000,1.381
8,mlp.down_proj,0.0000002613,0.05000,4.221
9,self_attn.v_proj,0.0000000562,0.05000,2.470
9,self_attn.k_proj,0.0000000951,0.05000,2.477
9,self_attn.q_proj,0.0000005431,0.05000,2.501
9,self_attn.o_proj,0.0000000489,0.05000,0.751
9,mlp.gate_proj,0.0000116203,0.05000,1.342
9,mlp.up_proj,0.0000078252,0.05000,1.344
9,mlp.down_proj,0.0000003043,0.05000,4.181
10,self_attn.v_proj,0.0000000932,0.05000,2.555
10,self_attn.k_proj,0.0000001140,0.05000,2.586
10,self_attn.q_proj,0.0000007667,0.05000,2.590
10,self_attn.o_proj,0.0000000496,0.05000,0.770
10,mlp.gate_proj,0.0000087202,0.05000,1.349
10,mlp.up_proj,0.0000057541,0.05000,1.354
10,mlp.down_proj,0.0000003632,0.05000,4.242
11,self_attn.v_proj,0.0000000759,0.05000,2.630
11,self_attn.q_proj,0.0000005705,0.05000,2.654
11,self_attn.k_proj,0.0000000839,0.05000,2.669
11,self_attn.o_proj,0.0000000737,0.05000,0.750
11,mlp.gate_proj,0.0000047226,0.05000,1.358
11,mlp.up_proj,0.0000033912,0.05000,1.362
11,mlp.down_proj,0.0000003967,0.05000,4.218
12,self_attn.v_proj,0.0000000690,0.05000,2.519
12,self_attn.k_proj,0.0000000907,0.05000,2.537
12,self_attn.q_proj,0.0000006007,0.05000,2.552
12,self_attn.o_proj,0.0000000782,0.05000,0.745
12,mlp.gate_proj,0.0000052067,0.05000,1.358
12,mlp.up_proj,0.0000036801,0.05000,1.363
12,mlp.down_proj,0.0000003800,0.05000,4.231
13,self_attn.k_proj,0.0000361680,0.05000,2.591
13,self_attn.q_proj,0.0001995003,0.05000,2.634
13,self_attn.v_proj,0.0000131416,0.05000,2.640
13,self_attn.o_proj,0.0000135469,0.05000,0.751
13,mlp.up_proj,0.0007942385,0.05000,1.379
13,mlp.gate_proj,0.0008285486,0.05000,1.388
13,mlp.down_proj,0.0000981101,0.05000,4.206
14,self_attn.k_proj,0.0000000836,0.05000,2.554
14,self_attn.q_proj,0.0000005530,0.05000,2.579
14,self_attn.v_proj,0.0000000556,0.05000,2.585
14,self_attn.o_proj,0.0000001078,0.05000,0.753
14,mlp.up_proj,0.0000026623,0.05000,1.337
14,mlp.gate_proj,0.0000027652,0.05000,1.350
14,mlp.down_proj,0.0000003071,0.05000,4.240
15,self_attn.v_proj,0.0000000539,0.05000,2.473
15,self_attn.q_proj,0.0000005573,0.05000,2.478
15,self_attn.k_proj,0.0000000866,0.05000,2.480
15,self_attn.o_proj,0.0000000966,0.05000,0.766
15,mlp.gate_proj,0.0000022427,0.05000,1.350
15,mlp.up_proj,0.0000023399,0.05000,1.355
15,mlp.down_proj,0.0000002715,0.05000,4.260
16,self_attn.q_proj,0.0001676437,0.05000,2.371
16,self_attn.v_proj,0.0000162746,0.05000,2.390
16,self_attn.k_proj,0.0000276929,0.05000,2.397
16,self_attn.o_proj,0.0000294759,0.05000,0.758
16,mlp.gate_proj,0.0006744311,0.05000,1.361
16,mlp.up_proj,0.0006608038,0.05000,1.371
16,mlp.down_proj,0.0000690996,0.05000,4.304
17,self_attn.k_proj,0.0000001542,0.05000,2.555
17,self_attn.q_proj,0.0000009098,0.05000,2.564
17,self_attn.v_proj,0.0000000860,0.05000,2.593
17,self_attn.o_proj,0.0000000673,0.05000,0.742
17,mlp.up_proj,0.0000020564,0.05000,1.364
17,mlp.gate_proj,0.0000020284,0.05000,1.365
17,mlp.down_proj,0.0000002287,0.05000,4.246
18,self_attn.v_proj,0.0000000613,0.05000,2.463
18,self_attn.q_proj,0.0000005767,0.05000,2.462
18,self_attn.k_proj,0.0000000836,0.05000,2.475
18,self_attn.o_proj,0.0000000865,0.05000,0.766
18,mlp.up_proj,0.0000019892,0.05000,1.361
18,mlp.gate_proj,0.0000020570,0.05000,1.369
18,mlp.down_proj,0.0000002146,0.05000,4.306
19,self_attn.q_proj,0.0000006371,0.05000,2.512
19,self_attn.v_proj,0.0000000634,0.05000,2.527
19,self_attn.k_proj,0.0000001108,0.05000,2.533
19,self_attn.o_proj,0.0000001121,0.05000,0.746
19,mlp.gate_proj,0.0000019210,0.05000,1.364
19,mlp.up_proj,0.0000020055,0.05000,1.369
19,mlp.down_proj,0.0000002133,0.05000,4.201
20,self_attn.k_proj,0.0000001130,0.05000,2.380
20,self_attn.q_proj,0.0000009866,0.05000,2.417
20,self_attn.v_proj,0.0000001241,0.05000,2.422
20,self_attn.o_proj,0.0000000941,0.05000,0.751
20,mlp.gate_proj,0.0000019665,0.05000,1.350
20,mlp.up_proj,0.0000019998,0.05000,1.355
20,mlp.down_proj,0.0000002300,0.05000,4.280
21,self_attn.v_proj,0.0000000834,0.05000,2.548
21,self_attn.k_proj,0.0000001145,0.05000,2.595
21,self_attn.q_proj,0.0000007214,0.05000,2.606
21,self_attn.o_proj,0.0000001060,0.05000,0.770
21,mlp.up_proj,0.0000019678,0.05000,1.388
21,mlp.gate_proj,0.0000021021,0.05000,1.392
21,mlp.down_proj,0.0000002017,0.05000,4.230
22,self_attn.v_proj,0.0000320336,0.05000,2.473
22,self_attn.q_proj,0.0002469760,0.05000,2.488
22,self_attn.k_proj,0.0000380821,0.05000,2.497
22,self_attn.o_proj,0.0000279594,0.05000,0.758
22,mlp.gate_proj,0.0006105048,0.05000,1.384
22,mlp.up_proj,0.0005966521,0.05000,1.390
22,mlp.down_proj,0.0000650289,0.05000,4.249
23,self_attn.v_proj,0.0000000674,0.05000,2.445
23,self_attn.k_proj,0.0000001263,0.05000,2.491
23,self_attn.q_proj,0.0000008772,0.05000,2.498
23,self_attn.o_proj,0.0000000902,0.05000,0.752
23,mlp.up_proj,0.0000022178,0.05000,1.404
23,mlp.gate_proj,0.0000021279,0.05000,1.410
23,mlp.down_proj,0.0000002565,0.05000,4.249
24,self_attn.v_proj,0.0000224701,0.05000,2.336
24,self_attn.k_proj,0.0000256526,0.05000,2.340
24,self_attn.q_proj,0.0001890992,0.05000,2.348
24,self_attn.o_proj,0.0000328178,0.05000,0.760
24,mlp.gate_proj,0.0006073164,0.05000,1.381
24,mlp.up_proj,0.0006237665,0.05000,1.390
24,mlp.down_proj,0.0000798095,0.05000,4.219
25,self_attn.v_proj,0.0000001261,0.05000,2.476
25,self_attn.k_proj,0.0000000869,0.05000,2.492
25,self_attn.q_proj,0.0000009063,0.05000,2.513
25,self_attn.o_proj,0.0000000863,0.05000,0.786
25,mlp.up_proj,0.0000023707,0.05000,1.362
25,mlp.gate_proj,0.0000023394,0.05000,1.371
25,mlp.down_proj,0.0000003536,0.05000,4.257
26,self_attn.k_proj,0.0000267902,0.05000,2.723
26,self_attn.v_proj,0.0000346260,0.05000,2.743
26,self_attn.q_proj,0.0002087079,0.05000,2.770
26,self_attn.o_proj,0.0000374781,0.05000,0.758
26,mlp.gate_proj,0.0007423891,0.05000,1.343
26,mlp.up_proj,0.0007881456,0.05000,1.347
26,mlp.down_proj,0.0001396396,0.05000,4.212
27,self_attn.k_proj,0.0000001067,0.05000,2.623
27,self_attn.v_proj,0.0000001766,0.05000,2.673
27,self_attn.q_proj,0.0000012480,0.05000,2.698
27,self_attn.o_proj,0.0000001959,0.05000,0.756
27,mlp.up_proj,0.0000029441,0.05000,1.377
27,mlp.gate_proj,0.0000029157,0.05000,1.382
27,mlp.down_proj,0.0000006032,0.05000,4.258
28,self_attn.v_proj,0.0000473291,0.05000,2.467
28,self_attn.k_proj,0.0000284646,0.05000,2.475
28,self_attn.q_proj,0.0002644169,0.05000,2.497
28,self_attn.o_proj,0.0000534566,0.05000,0.749
28,mlp.gate_proj,0.0009634670,0.05000,1.345
28,mlp.up_proj,0.0009686252,0.05000,1.350
28,mlp.down_proj,0.0002141817,0.05000,4.200
29,self_attn.v_proj,0.0000444986,0.05000,2.577
29,self_attn.k_proj,0.0000307635,0.05000,2.588
29,self_attn.q_proj,0.0002663389,0.05000,2.599
29,self_attn.o_proj,0.0000424056,0.05000,0.752
29,mlp.gate_proj,0.0011668274,0.05000,1.362
29,mlp.up_proj,0.0011953835,0.05000,1.365
29,mlp.down_proj,0.0002982608,0.05000,4.231
30,self_attn.q_proj,0.0000012274,0.05000,2.437
30,self_attn.k_proj,0.0000001052,0.05000,2.452
30,self_attn.v_proj,0.0000003586,0.05000,2.453
30,self_attn.o_proj,0.0000001560,0.05000,0.756
30,mlp.gate_proj,0.0000051271,0.05000,1.339
30,mlp.up_proj,0.0000056004,0.05000,1.346
30,mlp.down_proj,0.0000080519,0.05000,4.242
31,self_attn.v_proj,0.0000003798,0.05000,2.458
31,self_attn.k_proj,0.0000001412,0.05000,2.466
31,self_attn.q_proj,0.0000014251,0.05000,2.478
31,self_attn.o_proj,0.0000003600,0.05000,0.751
31,mlp.up_proj,0.0000067604,0.05000,1.359
31,mlp.gate_proj,0.0000059077,0.05000,1.364
31,mlp.down_proj,0.0000033549,0.05000,4.242
32,self_attn.v_proj,0.0000009778,0.05000,2.392
32,self_attn.q_proj,0.0000022483,0.05000,2.408
32,self_attn.k_proj,0.0000001966,0.05000,2.422
32,self_attn.o_proj,0.0000003576,0.05000,0.755
32,mlp.gate_proj,0.0000061421,0.05000,1.355
32,mlp.up_proj,0.0000070083,0.05000,1.360
32,mlp.down_proj,0.0000036502,0.05000,4.249
33,self_attn.k_proj,0.0000001775,0.05000,2.490
33,self_attn.v_proj,0.0000021774,0.05000,2.549
33,self_attn.q_proj,0.0000022492,0.05000,2.551
33,self_attn.o_proj,0.0000004517,0.05000,0.749
33,mlp.up_proj,0.0000074347,0.05000,1.371
33,mlp.gate_proj,0.0000059079,0.05000,1.378
33,mlp.down_proj,0.0000068711,0.05000,4.238
34,self_attn.v_proj,0.0000003419,0.05000,2.529
34,self_attn.k_proj,0.0000001017,0.05000,2.561
34,self_attn.q_proj,0.0000009959,0.05000,2.571
34,self_attn.o_proj,0.0000005017,0.05000,0.751
34,mlp.up_proj,0.0000077549,0.05000,1.376
34,mlp.gate_proj,0.0000066303,0.05000,1.379
34,mlp.down_proj,0.0000052670,0.05000,4.235
35,self_attn.k_proj,0.0000001000,0.05000,2.504
35,self_attn.q_proj,0.0000009493,0.05000,2.536
35,self_attn.v_proj,0.0000002995,0.05000,2.552
35,self_attn.o_proj,0.0000005288,0.05000,0.770
35,mlp.gate_proj,0.0000101551,0.05000,1.356
35,mlp.up_proj,0.0000107530,0.05000,1.361
35,mlp.down_proj,0.0000105331,0.05000,4.218
|