File size: 11,667 Bytes
d4d173b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 | layer,module,loss,samples,damp,time
0,self_attn.q_proj,0.0000003401,0.05000,2.345
0,self_attn.k_proj,0.0000000715,0.05000,2.364
0,self_attn.v_proj,0.0000000085,0.05000,2.368
0,self_attn.o_proj,0.0000000277,0.05000,0.771
0,mlp.gate_proj,0.0000003923,0.05000,1.354
0,mlp.up_proj,0.0000003028,0.05000,1.358
0,mlp.down_proj,0.0000000544,0.05000,4.481
1,self_attn.k_proj,0.0000000173,0.05000,2.555
1,self_attn.q_proj,0.0000000622,0.05000,2.587
1,self_attn.v_proj,0.0000000033,0.05000,2.622
1,self_attn.o_proj,0.0000000095,0.05000,0.750
1,mlp.gate_proj,0.0000222209,0.05000,1.368
1,mlp.up_proj,0.0000169856,0.05000,1.377
1,mlp.down_proj,0.0000000056,0.05000,4.306
2,self_attn.q_proj,0.0000000956,0.05000,2.530
2,self_attn.k_proj,0.0000000210,0.05000,2.544
2,self_attn.v_proj,0.0000000054,0.05000,2.568
2,self_attn.o_proj,0.0000000061,0.05000,0.756
2,mlp.gate_proj,0.0000048486,0.05000,1.414
2,mlp.up_proj,0.0000049113,0.05000,1.432
2,mlp.down_proj,0.0000085108,0.05000,4.318
3,self_attn.q_proj,0.0000004073,0.05000,2.616
3,self_attn.k_proj,0.0000000878,0.05000,2.670
3,self_attn.v_proj,0.0000000161,0.05000,2.678
3,self_attn.o_proj,0.0000000094,0.05000,0.782
3,mlp.up_proj,0.0000058005,0.05000,1.375
3,mlp.gate_proj,0.0000064317,0.05000,1.390
3,mlp.down_proj,0.0000001039,0.05000,4.202
4,self_attn.q_proj,0.0000003292,0.05000,2.495
4,self_attn.v_proj,0.0000000221,0.05000,2.539
4,self_attn.k_proj,0.0000000643,0.05000,2.549
4,self_attn.o_proj,0.0000000181,0.05000,0.765
4,mlp.gate_proj,0.0000060889,0.05000,1.356
4,mlp.up_proj,0.0000046043,0.05000,1.358
4,mlp.down_proj,0.0000000563,0.05000,4.267
5,self_attn.k_proj,0.0000001253,0.05000,2.652
5,self_attn.v_proj,0.0000000582,0.05000,2.677
5,self_attn.q_proj,0.0000007062,0.05000,2.693
5,self_attn.o_proj,0.0000000114,0.05000,0.745
5,mlp.gate_proj,0.0000084792,0.05000,1.395
5,mlp.up_proj,0.0000071883,0.05000,1.400
5,mlp.down_proj,0.0000001014,0.05000,4.247
6,self_attn.k_proj,0.0000000723,0.05000,2.712
6,self_attn.q_proj,0.0000004542,0.05000,2.727
6,self_attn.v_proj,0.0000000503,0.05000,2.746
6,self_attn.o_proj,0.0000000305,0.05000,0.753
6,mlp.gate_proj,0.0000109521,0.05000,1.381
6,mlp.up_proj,0.0000094474,0.05000,1.390
6,mlp.down_proj,0.0000001409,0.05000,4.240
7,self_attn.q_proj,0.0000006046,0.05000,2.634
7,self_attn.k_proj,0.0000000955,0.05000,2.698
7,self_attn.v_proj,0.0000000611,0.05000,2.720
7,self_attn.o_proj,0.0000000201,0.05000,0.758
7,mlp.gate_proj,0.0000111783,0.05000,1.347
7,mlp.up_proj,0.0000092888,0.05000,1.354
7,mlp.down_proj,0.0000001957,0.05000,4.267
8,self_attn.q_proj,0.0000006208,0.05000,2.460
8,self_attn.k_proj,0.0000000936,0.05000,2.473
8,self_attn.v_proj,0.0000000658,0.05000,2.481
8,self_attn.o_proj,0.0000000297,0.05000,0.755
8,mlp.up_proj,0.0000088363,0.05000,1.354
8,mlp.gate_proj,0.0000123492,0.05000,1.358
8,mlp.down_proj,0.0000002615,0.05000,4.249
9,self_attn.q_proj,0.0000005419,0.05000,2.480
9,self_attn.k_proj,0.0000000949,0.05000,2.487
9,self_attn.v_proj,0.0000000561,0.05000,2.517
9,self_attn.o_proj,0.0000000487,0.05000,0.757
9,mlp.gate_proj,0.0000116062,0.05000,1.368
9,mlp.up_proj,0.0000078157,0.05000,1.375
9,mlp.down_proj,0.0000003051,0.05000,4.267
10,self_attn.q_proj,0.0000007660,0.05000,2.545
10,self_attn.v_proj,0.0000000931,0.05000,2.556
10,self_attn.k_proj,0.0000001141,0.05000,2.560
10,self_attn.o_proj,0.0000000494,0.05000,0.749
10,mlp.gate_proj,0.0000087138,0.05000,1.360
10,mlp.up_proj,0.0000057507,0.05000,1.366
10,mlp.down_proj,0.0000003635,0.05000,4.293
11,self_attn.q_proj,0.0000005695,0.05000,2.428
11,self_attn.v_proj,0.0000000758,0.05000,2.446
11,self_attn.k_proj,0.0000000838,0.05000,2.454
11,self_attn.o_proj,0.0000000737,0.05000,0.767
11,mlp.gate_proj,0.0000047200,0.05000,1.349
11,mlp.up_proj,0.0000033895,0.05000,1.357
11,mlp.down_proj,0.0000003966,0.05000,4.351
12,self_attn.k_proj,0.0000000907,0.05000,2.451
12,self_attn.v_proj,0.0000000689,0.05000,2.479
12,self_attn.q_proj,0.0000005998,0.05000,2.492
12,self_attn.o_proj,0.0000000781,0.05000,0.750
12,mlp.gate_proj,0.0000052030,0.05000,1.376
12,mlp.up_proj,0.0000036776,0.05000,1.383
12,mlp.down_proj,0.0000003800,0.05000,4.308
13,self_attn.k_proj,0.0000361445,0.05000,2.567
13,self_attn.q_proj,0.0001988869,0.05000,2.576
13,self_attn.v_proj,0.0000131335,0.05000,2.590
13,self_attn.o_proj,0.0000135128,0.05000,0.761
13,mlp.up_proj,0.0007943917,0.05000,1.410
13,mlp.gate_proj,0.0008288599,0.05000,1.423
13,mlp.down_proj,0.0000983028,0.05000,4.245
14,self_attn.k_proj,0.0000000836,0.05000,2.513
14,self_attn.v_proj,0.0000000555,0.05000,2.535
14,self_attn.q_proj,0.0000005526,0.05000,2.542
14,self_attn.o_proj,0.0000001077,0.05000,0.758
14,mlp.gate_proj,0.0000027660,0.05000,1.381
14,mlp.up_proj,0.0000026624,0.05000,1.387
14,mlp.down_proj,0.0000003076,0.05000,4.313
15,self_attn.v_proj,0.0000000538,0.05000,2.525
15,self_attn.k_proj,0.0000000867,0.05000,2.572
15,self_attn.q_proj,0.0000005570,0.05000,2.578
15,self_attn.o_proj,0.0000000963,0.05000,0.750
15,mlp.up_proj,0.0000023406,0.05000,1.377
15,mlp.gate_proj,0.0000022434,0.05000,1.380
15,mlp.down_proj,0.0000002715,0.05000,4.259
16,self_attn.q_proj,0.0001674280,0.05000,2.427
16,self_attn.v_proj,0.0000162792,0.05000,2.520
16,self_attn.k_proj,0.0000277001,0.05000,2.537
16,self_attn.o_proj,0.0000293571,0.05000,0.758
16,mlp.up_proj,0.0006605779,0.05000,1.385
16,mlp.gate_proj,0.0006744062,0.05000,1.399
16,mlp.down_proj,0.0000691259,0.05000,4.257
17,self_attn.k_proj,0.0000001538,0.05000,2.463
17,self_attn.q_proj,0.0000009075,0.05000,2.471
17,self_attn.v_proj,0.0000000858,0.05000,2.469
17,self_attn.o_proj,0.0000000672,0.05000,0.773
17,mlp.up_proj,0.0000020540,0.05000,1.401
17,mlp.gate_proj,0.0000020261,0.05000,1.408
17,mlp.down_proj,0.0000002283,0.05000,4.257
18,self_attn.v_proj,0.0000000613,0.05000,2.509
18,self_attn.k_proj,0.0000000836,0.05000,2.535
18,self_attn.q_proj,0.0000005755,0.05000,2.543
18,self_attn.o_proj,0.0000000861,0.05000,0.763
18,mlp.gate_proj,0.0000020542,0.05000,1.371
18,mlp.up_proj,0.0000019867,0.05000,1.384
18,mlp.down_proj,0.0000002142,0.05000,4.281
19,self_attn.v_proj,0.0000000633,0.05000,2.534
19,self_attn.k_proj,0.0000001105,0.05000,2.549
19,self_attn.q_proj,0.0000006358,0.05000,2.559
19,self_attn.o_proj,0.0000001115,0.05000,0.778
19,mlp.gate_proj,0.0000019188,0.05000,1.345
19,mlp.up_proj,0.0000020043,0.05000,1.350
19,mlp.down_proj,0.0000002130,0.05000,4.248
20,self_attn.v_proj,0.0000001238,0.05000,2.480
20,self_attn.q_proj,0.0000009841,0.05000,2.509
20,self_attn.k_proj,0.0000001128,0.05000,2.515
20,self_attn.o_proj,0.0000000938,0.05000,0.753
20,mlp.gate_proj,0.0000019638,0.05000,1.350
20,mlp.up_proj,0.0000019977,0.05000,1.360
20,mlp.down_proj,0.0000002298,0.05000,4.285
21,self_attn.v_proj,0.0000000831,0.05000,2.684
21,self_attn.k_proj,0.0000001138,0.05000,2.703
21,self_attn.q_proj,0.0000007205,0.05000,2.715
21,self_attn.o_proj,0.0000001049,0.05000,0.765
21,mlp.up_proj,0.0000019671,0.05000,1.369
21,mlp.gate_proj,0.0000021018,0.05000,1.374
21,mlp.down_proj,0.0000002018,0.05000,4.270
22,self_attn.v_proj,0.0000001127,0.05000,2.530
22,self_attn.q_proj,0.0000008507,0.05000,2.575
22,self_attn.k_proj,0.0000001312,0.05000,2.581
22,self_attn.o_proj,0.0000000927,0.05000,0.761
22,mlp.gate_proj,0.0000021110,0.05000,1.372
22,mlp.up_proj,0.0000020616,0.05000,1.376
22,mlp.down_proj,0.0000002235,0.05000,4.254
23,self_attn.v_proj,0.0000000670,0.05000,2.514
23,self_attn.k_proj,0.0000001256,0.05000,2.541
23,self_attn.q_proj,0.0000008720,0.05000,2.550
23,self_attn.o_proj,0.0000000902,0.05000,0.759
23,mlp.gate_proj,0.0000021169,0.05000,1.350
23,mlp.up_proj,0.0000022067,0.05000,1.356
23,mlp.down_proj,0.0000002553,0.05000,4.296
24,self_attn.k_proj,0.0000255149,0.05000,2.405
24,self_attn.v_proj,0.0000224293,0.05000,2.434
24,self_attn.q_proj,0.0001882081,0.05000,2.442
24,self_attn.o_proj,0.0000325786,0.05000,0.769
24,mlp.up_proj,0.0006212120,0.05000,1.378
24,mlp.gate_proj,0.0006047986,0.05000,1.390
24,mlp.down_proj,0.0000792528,0.05000,4.284
25,self_attn.v_proj,0.0000001261,0.05000,2.402
25,self_attn.k_proj,0.0000000865,0.05000,2.413
25,self_attn.q_proj,0.0000009028,0.05000,2.425
25,self_attn.o_proj,0.0000000856,0.05000,0.752
25,mlp.gate_proj,0.0000023320,0.05000,1.363
25,mlp.up_proj,0.0000023642,0.05000,1.368
25,mlp.down_proj,0.0000003521,0.05000,4.316
26,self_attn.v_proj,0.0000345133,0.05000,2.484
26,self_attn.q_proj,0.0002079831,0.05000,2.532
26,self_attn.k_proj,0.0000266343,0.05000,2.539
26,self_attn.o_proj,0.0000372935,0.05000,0.759
26,mlp.up_proj,0.0007854277,0.05000,1.365
26,mlp.gate_proj,0.0007399612,0.05000,1.366
26,mlp.down_proj,0.0001390844,0.05000,4.290
27,self_attn.q_proj,0.0000012426,0.05000,2.442
27,self_attn.k_proj,0.0000001063,0.05000,2.455
27,self_attn.v_proj,0.0000001755,0.05000,2.464
27,self_attn.o_proj,0.0000001954,0.05000,0.743
27,mlp.gate_proj,0.0000029087,0.05000,1.346
27,mlp.up_proj,0.0000029363,0.05000,1.359
27,mlp.down_proj,0.0000006022,0.05000,4.340
28,self_attn.k_proj,0.0000283726,0.05000,2.394
28,self_attn.v_proj,0.0000471470,0.05000,2.405
28,self_attn.q_proj,0.0002638888,0.05000,2.417
28,self_attn.o_proj,0.0000527611,0.05000,0.737
28,mlp.gate_proj,0.0009632122,0.05000,1.353
28,mlp.up_proj,0.0009681229,0.05000,1.360
28,mlp.down_proj,0.0002143012,0.05000,4.288
29,self_attn.v_proj,0.0000444782,0.05000,2.417
29,self_attn.k_proj,0.0000306777,0.05000,2.451
29,self_attn.q_proj,0.0002660261,0.05000,2.460
29,self_attn.o_proj,0.0000419030,0.05000,0.753
29,mlp.gate_proj,0.0011661545,0.05000,1.349
29,mlp.up_proj,0.0011946059,0.05000,1.355
29,mlp.down_proj,0.0002983677,0.05000,4.321
30,self_attn.v_proj,0.0000003594,0.05000,2.520
30,self_attn.k_proj,0.0000001052,0.05000,2.556
30,self_attn.q_proj,0.0000012281,0.05000,2.563
30,self_attn.o_proj,0.0000001564,0.05000,0.763
30,mlp.gate_proj,0.0000051266,0.05000,1.384
30,mlp.up_proj,0.0000055988,0.05000,1.389
30,mlp.down_proj,0.0000080552,0.05000,4.288
31,self_attn.q_proj,0.0000014235,0.05000,2.486
31,self_attn.k_proj,0.0000001413,0.05000,2.555
31,self_attn.v_proj,0.0000003790,0.05000,2.578
31,self_attn.o_proj,0.0000003545,0.05000,0.753
31,mlp.gate_proj,0.0000059219,0.05000,1.385
31,mlp.up_proj,0.0000067765,0.05000,1.389
31,mlp.down_proj,0.0000033691,0.05000,4.295
32,self_attn.q_proj,0.0000022495,0.05000,2.460
32,self_attn.k_proj,0.0000001965,0.05000,2.467
32,self_attn.v_proj,0.0000009772,0.05000,2.481
32,self_attn.o_proj,0.0000003497,0.05000,0.767
32,mlp.gate_proj,0.0000061569,0.05000,1.398
32,mlp.up_proj,0.0000070279,0.05000,1.404
32,mlp.down_proj,0.0000036677,0.05000,4.273
33,self_attn.k_proj,0.0000001776,0.05000,2.578
33,self_attn.v_proj,0.0000021733,0.05000,2.592
33,self_attn.q_proj,0.0000022470,0.05000,2.597
33,self_attn.o_proj,0.0000004441,0.05000,0.758
33,mlp.gate_proj,0.0000059196,0.05000,1.358
33,mlp.up_proj,0.0000074468,0.05000,1.363
33,mlp.down_proj,0.0000068785,0.05000,4.258
34,self_attn.k_proj,0.0000001017,0.05000,2.520
34,self_attn.q_proj,0.0000009964,0.05000,2.541
34,self_attn.v_proj,0.0000003421,0.05000,2.558
34,self_attn.o_proj,0.0000004992,0.05000,0.785
34,mlp.up_proj,0.0000077734,0.05000,1.347
34,mlp.gate_proj,0.0000066457,0.05000,1.352
34,mlp.down_proj,0.0000052847,0.05000,4.240
35,self_attn.k_proj,0.0000001003,0.05000,2.693
35,self_attn.v_proj,0.0000003007,0.05000,2.721
35,self_attn.q_proj,0.0000009509,0.05000,2.724
35,self_attn.o_proj,0.0000005282,0.05000,0.765
35,mlp.gate_proj,0.0000101811,0.05000,1.361
35,mlp.up_proj,0.0000107798,0.05000,1.373
35,mlp.down_proj,0.0000105316,0.05000,4.297
|