File size: 11,667 Bytes
1c4fbfb | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 | layer,module,loss,samples,damp,time
0,self_attn.q_proj,0.0000003401,0.05000,2.456
0,self_attn.k_proj,0.0000000715,0.05000,2.511
0,self_attn.v_proj,0.0000000085,0.05000,2.522
0,self_attn.o_proj,0.0000000277,0.05000,0.727
0,mlp.up_proj,0.0000003028,0.05000,1.351
0,mlp.gate_proj,0.0000003923,0.05000,1.363
0,mlp.down_proj,0.0000000544,0.05000,4.372
1,self_attn.k_proj,0.0000000173,0.05000,2.496
1,self_attn.v_proj,0.0000000033,0.05000,2.535
1,self_attn.q_proj,0.0000000622,0.05000,2.537
1,self_attn.o_proj,0.0000000095,0.05000,0.742
1,mlp.up_proj,0.0000169856,0.05000,1.392
1,mlp.gate_proj,0.0000222209,0.05000,1.398
1,mlp.down_proj,0.0000000056,0.05000,4.177
2,self_attn.k_proj,0.0000000210,0.05000,2.543
2,self_attn.q_proj,0.0000000956,0.05000,2.564
2,self_attn.v_proj,0.0000000054,0.05000,2.588
2,self_attn.o_proj,0.0000000061,0.05000,0.739
2,mlp.gate_proj,0.0000048486,0.05000,1.366
2,mlp.up_proj,0.0000049113,0.05000,1.373
2,mlp.down_proj,0.0000085108,0.05000,4.199
3,self_attn.v_proj,0.0000000161,0.05000,2.525
3,self_attn.k_proj,0.0000000878,0.05000,2.538
3,self_attn.q_proj,0.0000004073,0.05000,2.542
3,self_attn.o_proj,0.0000000094,0.05000,0.753
3,mlp.gate_proj,0.0000064317,0.05000,1.357
3,mlp.up_proj,0.0000058005,0.05000,1.365
3,mlp.down_proj,0.0000001039,0.05000,4.192
4,self_attn.q_proj,0.0000003292,0.05000,2.325
4,self_attn.v_proj,0.0000000221,0.05000,2.356
4,self_attn.k_proj,0.0000000643,0.05000,2.361
4,self_attn.o_proj,0.0000000181,0.05000,0.750
4,mlp.gate_proj,0.0000060889,0.05000,1.348
4,mlp.up_proj,0.0000046043,0.05000,1.352
4,mlp.down_proj,0.0000000563,0.05000,4.266
5,self_attn.v_proj,0.0000000582,0.05000,2.441
5,self_attn.q_proj,0.0000007062,0.05000,2.460
5,self_attn.k_proj,0.0000001253,0.05000,2.465
5,self_attn.o_proj,0.0000000114,0.05000,0.750
5,mlp.gate_proj,0.0000084792,0.05000,1.368
5,mlp.up_proj,0.0000071883,0.05000,1.372
5,mlp.down_proj,0.0000001014,0.05000,4.254
6,self_attn.q_proj,0.0000004542,0.05000,2.507
6,self_attn.v_proj,0.0000000503,0.05000,2.529
6,self_attn.k_proj,0.0000000723,0.05000,2.543
6,self_attn.o_proj,0.0000000305,0.05000,0.750
6,mlp.gate_proj,0.0000109521,0.05000,1.369
6,mlp.up_proj,0.0000094474,0.05000,1.381
6,mlp.down_proj,0.0000001409,0.05000,4.289
7,self_attn.k_proj,0.0000000955,0.05000,2.519
7,self_attn.q_proj,0.0000006046,0.05000,2.523
7,self_attn.v_proj,0.0000000611,0.05000,2.539
7,self_attn.o_proj,0.0000000201,0.05000,0.749
7,mlp.up_proj,0.0000092888,0.05000,1.355
7,mlp.gate_proj,0.0000111783,0.05000,1.360
7,mlp.down_proj,0.0000001957,0.05000,4.250
8,self_attn.v_proj,0.0000000658,0.05000,2.420
8,self_attn.k_proj,0.0000000936,0.05000,2.438
8,self_attn.q_proj,0.0000006208,0.05000,2.456
8,self_attn.o_proj,0.0000000297,0.05000,0.741
8,mlp.up_proj,0.0000088363,0.05000,1.376
8,mlp.gate_proj,0.0000123492,0.05000,1.387
8,mlp.down_proj,0.0000002615,0.05000,4.274
9,self_attn.v_proj,0.0000000561,0.05000,2.445
9,self_attn.k_proj,0.0000000949,0.05000,2.457
9,self_attn.q_proj,0.0000005419,0.05000,2.473
9,self_attn.o_proj,0.0000000487,0.05000,0.754
9,mlp.up_proj,0.0000078157,0.05000,1.364
9,mlp.gate_proj,0.0000116062,0.05000,1.371
9,mlp.down_proj,0.0000003051,0.05000,4.189
10,self_attn.q_proj,0.0000007660,0.05000,2.538
10,self_attn.v_proj,0.0000000931,0.05000,2.554
10,self_attn.k_proj,0.0000001141,0.05000,2.582
10,self_attn.o_proj,0.0000000494,0.05000,0.754
10,mlp.gate_proj,0.0000087138,0.05000,1.354
10,mlp.up_proj,0.0000057507,0.05000,1.358
10,mlp.down_proj,0.0000003635,0.05000,4.210
11,self_attn.q_proj,0.0000005695,0.05000,2.455
11,self_attn.k_proj,0.0000000838,0.05000,2.469
11,self_attn.v_proj,0.0000000758,0.05000,2.473
11,self_attn.o_proj,0.0000000737,0.05000,0.758
11,mlp.up_proj,0.0000033895,0.05000,1.429
11,mlp.gate_proj,0.0000047200,0.05000,1.434
11,mlp.down_proj,0.0000003966,0.05000,4.243
12,self_attn.v_proj,0.0000000689,0.05000,2.344
12,self_attn.k_proj,0.0000000907,0.05000,2.375
12,self_attn.q_proj,0.0000005998,0.05000,2.376
12,self_attn.o_proj,0.0000000781,0.05000,0.744
12,mlp.up_proj,0.0000036776,0.05000,1.390
12,mlp.gate_proj,0.0000052030,0.05000,1.391
12,mlp.down_proj,0.0000003800,0.05000,4.251
13,self_attn.k_proj,0.0000361445,0.05000,2.470
13,self_attn.q_proj,0.0001988869,0.05000,2.479
13,self_attn.v_proj,0.0000131335,0.05000,2.485
13,self_attn.o_proj,0.0000135128,0.05000,0.745
13,mlp.gate_proj,0.0008288599,0.05000,1.382
13,mlp.up_proj,0.0007943917,0.05000,1.387
13,mlp.down_proj,0.0000983028,0.05000,4.216
14,self_attn.v_proj,0.0000000555,0.05000,2.447
14,self_attn.k_proj,0.0000000836,0.05000,2.523
14,self_attn.q_proj,0.0000005526,0.05000,2.541
14,self_attn.o_proj,0.0000001077,0.05000,0.754
14,mlp.gate_proj,0.0000027660,0.05000,1.384
14,mlp.up_proj,0.0000026624,0.05000,1.388
14,mlp.down_proj,0.0000003076,0.05000,4.218
15,self_attn.k_proj,0.0000000867,0.05000,2.515
15,self_attn.v_proj,0.0000000538,0.05000,2.525
15,self_attn.q_proj,0.0000005570,0.05000,2.528
15,self_attn.o_proj,0.0000000963,0.05000,0.751
15,mlp.gate_proj,0.0000022434,0.05000,1.351
15,mlp.up_proj,0.0000023406,0.05000,1.354
15,mlp.down_proj,0.0000002715,0.05000,4.256
16,self_attn.v_proj,0.0000000568,0.05000,2.429
16,self_attn.k_proj,0.0000000957,0.05000,2.439
16,self_attn.q_proj,0.0000005776,0.05000,2.444
16,self_attn.o_proj,0.0000001003,0.05000,0.751
16,mlp.up_proj,0.0000022801,0.05000,1.354
16,mlp.gate_proj,0.0000023282,0.05000,1.369
16,mlp.down_proj,0.0000002357,0.05000,4.220
17,self_attn.q_proj,0.0000009054,0.05000,2.522
17,self_attn.v_proj,0.0000000854,0.05000,2.539
17,self_attn.k_proj,0.0000001532,0.05000,2.547
17,self_attn.o_proj,0.0000000666,0.05000,0.750
17,mlp.up_proj,0.0000020422,0.05000,1.365
17,mlp.gate_proj,0.0000020144,0.05000,1.378
17,mlp.down_proj,0.0000002270,0.05000,4.209
18,self_attn.q_proj,0.0000005731,0.05000,2.550
18,self_attn.v_proj,0.0000000610,0.05000,2.625
18,self_attn.k_proj,0.0000000831,0.05000,2.640
18,self_attn.o_proj,0.0000000860,0.05000,0.748
18,mlp.up_proj,0.0000019781,0.05000,1.387
18,mlp.gate_proj,0.0000020457,0.05000,1.392
18,mlp.down_proj,0.0000002135,0.05000,4.196
19,self_attn.v_proj,0.0000000631,0.05000,2.677
19,self_attn.q_proj,0.0000006335,0.05000,2.683
19,self_attn.k_proj,0.0000001105,0.05000,2.701
19,self_attn.o_proj,0.0000001114,0.05000,0.759
19,mlp.up_proj,0.0000019969,0.05000,1.361
19,mlp.gate_proj,0.0000019120,0.05000,1.369
19,mlp.down_proj,0.0000002123,0.05000,4.198
20,self_attn.q_proj,0.0000009823,0.05000,2.507
20,self_attn.k_proj,0.0000001125,0.05000,2.513
20,self_attn.v_proj,0.0000001236,0.05000,2.525
20,self_attn.o_proj,0.0000000935,0.05000,0.743
20,mlp.gate_proj,0.0000019580,0.05000,1.357
20,mlp.up_proj,0.0000019916,0.05000,1.363
20,mlp.down_proj,0.0000002291,0.05000,4.209
21,self_attn.v_proj,0.0000000829,0.05000,2.504
21,self_attn.k_proj,0.0000001137,0.05000,2.514
21,self_attn.q_proj,0.0000007184,0.05000,2.522
21,self_attn.o_proj,0.0000001047,0.05000,0.737
21,mlp.up_proj,0.0000019637,0.05000,1.380
21,mlp.gate_proj,0.0000020983,0.05000,1.383
21,mlp.down_proj,0.0000002016,0.05000,4.206
22,self_attn.v_proj,0.0000001125,0.05000,2.485
22,self_attn.q_proj,0.0000008492,0.05000,2.484
22,self_attn.k_proj,0.0000001309,0.05000,2.501
22,self_attn.o_proj,0.0000000925,0.05000,0.743
22,mlp.gate_proj,0.0000021090,0.05000,1.340
22,mlp.up_proj,0.0000020592,0.05000,1.343
22,mlp.down_proj,0.0000002234,0.05000,4.213
23,self_attn.q_proj,0.0000008715,0.05000,2.384
23,self_attn.k_proj,0.0000001256,0.05000,2.441
23,self_attn.v_proj,0.0000000670,0.05000,2.447
23,self_attn.o_proj,0.0000000901,0.05000,0.747
23,mlp.gate_proj,0.0000021141,0.05000,1.354
23,mlp.up_proj,0.0000022042,0.05000,1.364
23,mlp.down_proj,0.0000002550,0.05000,4.255
24,self_attn.q_proj,0.0001880032,0.05000,2.437
24,self_attn.k_proj,0.0000255162,0.05000,2.487
24,self_attn.v_proj,0.0000224003,0.05000,2.494
24,self_attn.o_proj,0.0000325736,0.05000,0.754
24,mlp.gate_proj,0.0006041626,0.05000,1.362
24,mlp.up_proj,0.0006203335,0.05000,1.367
24,mlp.down_proj,0.0000791926,0.05000,4.315
25,self_attn.k_proj,0.0000000866,0.05000,2.411
25,self_attn.v_proj,0.0000001260,0.05000,2.444
25,self_attn.q_proj,0.0000009027,0.05000,2.451
25,self_attn.o_proj,0.0000000853,0.05000,0.754
25,mlp.gate_proj,0.0000023311,0.05000,1.374
25,mlp.up_proj,0.0000023624,0.05000,1.379
25,mlp.down_proj,0.0000003523,0.05000,4.301
26,self_attn.k_proj,0.0000000916,0.05000,2.474
26,self_attn.v_proj,0.0000001193,0.05000,2.494
26,self_attn.q_proj,0.0000007168,0.05000,2.504
26,self_attn.o_proj,0.0000001270,0.05000,0.745
26,mlp.up_proj,0.0000027133,0.05000,1.352
26,mlp.gate_proj,0.0000025628,0.05000,1.356
26,mlp.down_proj,0.0000004770,0.05000,4.249
27,self_attn.v_proj,0.0000001746,0.05000,2.479
27,self_attn.k_proj,0.0000001062,0.05000,2.514
27,self_attn.q_proj,0.0000012390,0.05000,2.537
27,self_attn.o_proj,0.0000001945,0.05000,0.754
27,mlp.up_proj,0.0000029269,0.05000,1.367
27,mlp.gate_proj,0.0000028995,0.05000,1.372
27,mlp.down_proj,0.0000006006,0.05000,4.223
28,self_attn.v_proj,0.0000001665,0.05000,2.396
28,self_attn.k_proj,0.0000000975,0.05000,2.404
28,self_attn.q_proj,0.0000009054,0.05000,2.413
28,self_attn.o_proj,0.0000001827,0.05000,0.765
28,mlp.gate_proj,0.0000033200,0.05000,1.357
28,mlp.up_proj,0.0000033334,0.05000,1.360
28,mlp.down_proj,0.0000007344,0.05000,4.210
29,self_attn.k_proj,0.0000305678,0.05000,2.542
29,self_attn.q_proj,0.0002649824,0.05000,2.574
29,self_attn.v_proj,0.0000443468,0.05000,2.591
29,self_attn.o_proj,0.0000417074,0.05000,0.756
29,mlp.up_proj,0.0011906463,0.05000,1.378
29,mlp.gate_proj,0.0011616761,0.05000,1.384
29,mlp.down_proj,0.0002980117,0.05000,4.286
30,self_attn.q_proj,0.0000012240,0.05000,2.520
30,self_attn.v_proj,0.0000003584,0.05000,2.549
30,self_attn.k_proj,0.0000001048,0.05000,2.563
30,self_attn.o_proj,0.0000001559,0.05000,0.753
30,mlp.up_proj,0.0000055833,0.05000,1.423
30,mlp.gate_proj,0.0000051110,0.05000,1.428
30,mlp.down_proj,0.0000081476,0.05000,4.274
31,self_attn.k_proj,0.0000001408,0.05000,2.504
31,self_attn.q_proj,0.0000014188,0.05000,2.516
31,self_attn.v_proj,0.0000003782,0.05000,2.525
31,self_attn.o_proj,0.0000003638,0.05000,0.748
31,mlp.up_proj,0.0000067382,0.05000,1.376
31,mlp.gate_proj,0.0000058890,0.05000,1.390
31,mlp.down_proj,0.0000033553,0.05000,4.233
32,self_attn.q_proj,0.0000022478,0.05000,2.452
32,self_attn.k_proj,0.0000001967,0.05000,2.457
32,self_attn.v_proj,0.0000009761,0.05000,2.464
32,self_attn.o_proj,0.0000003565,0.05000,0.755
32,mlp.gate_proj,0.0000061484,0.05000,1.382
32,mlp.up_proj,0.0000070169,0.05000,1.385
32,mlp.down_proj,0.0000036669,0.05000,4.195
33,self_attn.k_proj,0.0000001779,0.05000,2.507
33,self_attn.v_proj,0.0000021782,0.05000,2.514
33,self_attn.q_proj,0.0000022511,0.05000,2.517
33,self_attn.o_proj,0.0000004571,0.05000,0.752
33,mlp.gate_proj,0.0000059207,0.05000,1.364
33,mlp.up_proj,0.0000074530,0.05000,1.369
33,mlp.down_proj,0.0000068917,0.05000,4.231
34,self_attn.v_proj,0.0000003427,0.05000,2.650
34,self_attn.q_proj,0.0000009977,0.05000,2.765
34,self_attn.k_proj,0.0000001021,0.05000,2.776
34,self_attn.o_proj,0.0000005056,0.05000,0.760
34,mlp.gate_proj,0.0000066525,0.05000,1.370
34,mlp.up_proj,0.0000077802,0.05000,1.373
34,mlp.down_proj,0.0000052944,0.05000,4.182
35,self_attn.k_proj,0.0000001002,0.05000,2.618
35,self_attn.q_proj,0.0000009525,0.05000,2.639
35,self_attn.v_proj,0.0000003007,0.05000,2.666
35,self_attn.o_proj,0.0000005326,0.05000,0.757
35,mlp.up_proj,0.0000107934,0.05000,1.366
35,mlp.gate_proj,0.0000101982,0.05000,1.372
35,mlp.down_proj,0.0000105537,0.05000,4.231
|