File size: 11,667 Bytes
0b358c2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 | layer,module,loss,samples,damp,time
0,self_attn.q_proj,0.0002350870,0.05000,1.389
0,self_attn.k_proj,0.0000495884,0.05000,1.401
0,self_attn.v_proj,0.0000058943,0.05000,1.411
0,self_attn.o_proj,0.0000236358,0.05000,0.468
0,mlp.up_proj,0.0002387998,0.05000,0.884
0,mlp.gate_proj,0.0003092090,0.05000,0.905
0,mlp.down_proj,0.0000352943,0.05000,2.892
1,self_attn.q_proj,0.0000474484,0.05000,1.445
1,self_attn.k_proj,0.0000132815,0.05000,1.464
1,self_attn.v_proj,0.0000025667,0.05000,1.471
1,self_attn.o_proj,0.0000079649,0.05000,0.535
1,mlp.up_proj,0.0130337269,0.05000,1.058
1,mlp.gate_proj,0.0172095274,0.05000,1.066
1,mlp.down_proj,0.0000033589,0.05000,3.471
2,self_attn.k_proj,0.0000159378,0.05000,1.344
2,self_attn.v_proj,0.0000040993,0.05000,1.359
2,self_attn.q_proj,0.0000724677,0.05000,1.372
2,self_attn.o_proj,0.0000051665,0.05000,0.496
2,mlp.gate_proj,0.0038327214,0.05000,0.789
2,mlp.up_proj,0.0037921627,0.05000,0.796
2,mlp.down_proj,0.0090362954,0.05000,2.958
3,self_attn.v_proj,0.0000121165,0.05000,1.371
3,self_attn.k_proj,0.0000652644,0.05000,1.377
3,self_attn.q_proj,0.0003032616,0.05000,1.398
3,self_attn.o_proj,0.0000070980,0.05000,0.840
3,mlp.gate_proj,0.0050947977,0.05000,1.129
3,mlp.up_proj,0.0043798673,0.05000,1.141
3,mlp.down_proj,0.0001146523,0.05000,2.803
4,self_attn.v_proj,0.0000165725,0.05000,1.741
4,self_attn.q_proj,0.0002473793,0.05000,1.757
4,self_attn.k_proj,0.0000484598,0.05000,1.771
4,self_attn.o_proj,0.0000154044,0.05000,0.452
4,mlp.up_proj,0.0034890500,0.05000,0.945
4,mlp.gate_proj,0.0048000967,0.05000,0.968
4,mlp.down_proj,0.0000431230,0.05000,2.768
5,self_attn.v_proj,0.0000432888,0.05000,1.424
5,self_attn.q_proj,0.0005337124,0.05000,1.427
5,self_attn.k_proj,0.0000954607,0.05000,1.446
5,self_attn.o_proj,0.0000096460,0.05000,0.449
5,mlp.gate_proj,0.0064711852,0.05000,0.952
5,mlp.up_proj,0.0054382997,0.05000,0.964
5,mlp.down_proj,0.0000662426,0.05000,2.780
6,self_attn.v_proj,0.0000374054,0.05000,1.393
6,self_attn.q_proj,0.0003390273,0.05000,1.413
6,self_attn.k_proj,0.0000539993,0.05000,1.431
6,self_attn.o_proj,0.0000239811,0.05000,0.453
6,mlp.gate_proj,0.0083112013,0.05000,0.931
6,mlp.up_proj,0.0070795329,0.05000,0.940
6,mlp.down_proj,0.0000900177,0.05000,2.698
7,self_attn.k_proj,0.0000704335,0.05000,1.509
7,self_attn.q_proj,0.0004461423,0.05000,1.532
7,self_attn.v_proj,0.0000449421,0.05000,1.540
7,self_attn.o_proj,0.0000158974,0.05000,0.479
7,mlp.gate_proj,0.0084789699,0.05000,1.144
7,mlp.up_proj,0.0068957208,0.05000,1.156
7,mlp.down_proj,0.0001276712,0.05000,3.500
8,self_attn.q_proj,0.0004550622,0.05000,1.419
8,self_attn.v_proj,0.0000480385,0.05000,1.423
8,self_attn.k_proj,0.0000688795,0.05000,1.428
8,self_attn.o_proj,0.0000237095,0.05000,0.469
8,mlp.up_proj,0.0065720858,0.05000,0.953
8,mlp.gate_proj,0.0092143638,0.05000,0.957
8,mlp.down_proj,0.0001645900,0.05000,2.690
9,self_attn.q_proj,0.0004010807,0.05000,1.298
9,self_attn.k_proj,0.0000703355,0.05000,1.310
9,self_attn.v_proj,0.0000412660,0.05000,1.319
9,self_attn.o_proj,0.0000372614,0.05000,0.451
9,mlp.gate_proj,0.0086270275,0.05000,0.724
9,mlp.up_proj,0.0057881656,0.05000,0.729
9,mlp.down_proj,0.0001873366,0.05000,2.661
10,self_attn.v_proj,0.0000675373,0.05000,1.464
10,self_attn.k_proj,0.0000842096,0.05000,1.476
10,self_attn.q_proj,0.0005640181,0.05000,1.486
10,self_attn.o_proj,0.0000375405,0.05000,0.465
10,mlp.gate_proj,0.0064309352,0.05000,0.952
10,mlp.up_proj,0.0042183016,0.05000,0.952
10,mlp.down_proj,0.0002274845,0.05000,2.724
11,self_attn.k_proj,0.0000618624,0.05000,1.443
11,self_attn.v_proj,0.0000554348,0.05000,1.452
11,self_attn.q_proj,0.0004188407,0.05000,1.484
11,self_attn.o_proj,0.0000588654,0.05000,0.461
11,mlp.up_proj,0.0024528662,0.05000,0.955
11,mlp.gate_proj,0.0034099502,0.05000,0.960
11,mlp.down_proj,0.0002484049,0.05000,2.826
12,self_attn.k_proj,0.0000663459,0.05000,1.707
12,self_attn.q_proj,0.0004353355,0.05000,1.714
12,self_attn.v_proj,0.0000493771,0.05000,1.720
12,self_attn.o_proj,0.0000569917,0.05000,0.534
12,mlp.gate_proj,0.0037296684,0.05000,0.927
12,mlp.up_proj,0.0026382779,0.05000,0.932
12,mlp.down_proj,0.0002238520,0.05000,2.694
13,self_attn.k_proj,0.0000918726,0.05000,1.413
13,self_attn.v_proj,0.0000332990,0.05000,1.428
13,self_attn.q_proj,0.0005053526,0.05000,1.429
13,self_attn.o_proj,0.0000357019,0.05000,0.451
13,mlp.gate_proj,0.0020309976,0.05000,0.801
13,mlp.up_proj,0.0019490962,0.05000,0.810
13,mlp.down_proj,0.0001979468,0.05000,2.777
14,self_attn.q_proj,0.0003977691,0.05000,1.297
14,self_attn.v_proj,0.0000393797,0.05000,1.304
14,self_attn.k_proj,0.0000605854,0.05000,1.308
14,self_attn.o_proj,0.0000810008,0.05000,0.458
14,mlp.up_proj,0.0018685926,0.05000,0.907
14,mlp.gate_proj,0.0019428260,0.05000,0.913
14,mlp.down_proj,0.0001839744,0.05000,2.739
15,self_attn.k_proj,0.0000632234,0.05000,1.297
15,self_attn.v_proj,0.0000390386,0.05000,1.308
15,self_attn.q_proj,0.0004023011,0.05000,1.323
15,self_attn.o_proj,0.0000734222,0.05000,0.459
15,mlp.up_proj,0.0016407124,0.05000,0.922
15,mlp.gate_proj,0.0015691990,0.05000,0.930
15,mlp.down_proj,0.0001619132,0.05000,2.651
16,self_attn.v_proj,0.0000397472,0.05000,1.758
16,self_attn.q_proj,0.0004093029,0.05000,1.780
16,self_attn.k_proj,0.0000679986,0.05000,1.785
16,self_attn.o_proj,0.0000735932,0.05000,0.714
16,mlp.gate_proj,0.0016409640,0.05000,1.356
16,mlp.up_proj,0.0016108529,0.05000,1.364
16,mlp.down_proj,0.0001433920,0.05000,2.709
17,self_attn.q_proj,0.0006384926,0.05000,1.454
17,self_attn.v_proj,0.0000570526,0.05000,1.470
17,self_attn.k_proj,0.0001091180,0.05000,1.481
17,self_attn.o_proj,0.0000506870,0.05000,0.467
17,mlp.up_proj,0.0014302930,0.05000,0.949
17,mlp.gate_proj,0.0014094597,0.05000,0.954
17,mlp.down_proj,0.0001370377,0.05000,2.728
18,self_attn.q_proj,0.0004070264,0.05000,1.559
18,self_attn.k_proj,0.0000593264,0.05000,1.642
18,self_attn.v_proj,0.0000428977,0.05000,1.651
18,self_attn.o_proj,0.0000611151,0.05000,0.643
18,mlp.up_proj,0.0013951592,0.05000,1.321
18,mlp.gate_proj,0.0014391315,0.05000,1.320
18,mlp.down_proj,0.0001301621,0.05000,4.631
19,self_attn.v_proj,0.0000454103,0.05000,2.170
19,self_attn.k_proj,0.0000802021,0.05000,2.260
19,self_attn.q_proj,0.0004584398,0.05000,2.283
19,self_attn.o_proj,0.0000832018,0.05000,0.855
19,mlp.gate_proj,0.0013592220,0.05000,1.760
19,mlp.up_proj,0.0014207353,0.05000,1.786
19,mlp.down_proj,0.0001298065,0.05000,4.033
20,self_attn.k_proj,0.0000823069,0.05000,1.879
20,self_attn.q_proj,0.0007093750,0.05000,1.935
20,self_attn.v_proj,0.0000864942,0.05000,1.993
20,self_attn.o_proj,0.0000718808,0.05000,0.821
20,mlp.gate_proj,0.0013735232,0.05000,1.072
20,mlp.up_proj,0.0014128388,0.05000,1.215
20,mlp.down_proj,0.0001408066,0.05000,4.534
21,self_attn.k_proj,0.0000800281,0.05000,2.156
21,self_attn.q_proj,0.0005046818,0.05000,2.172
21,self_attn.v_proj,0.0000577273,0.05000,2.274
21,self_attn.o_proj,0.0000763357,0.05000,0.748
21,mlp.gate_proj,0.0014464086,0.05000,1.168
21,mlp.up_proj,0.0013651836,0.05000,1.212
21,mlp.down_proj,0.0001177456,0.05000,2.956
22,self_attn.k_proj,0.0000903085,0.05000,1.766
22,self_attn.q_proj,0.0005909076,0.05000,1.806
22,self_attn.v_proj,0.0000757894,0.05000,1.810
22,self_attn.o_proj,0.0000623799,0.05000,0.526
22,mlp.up_proj,0.0014004772,0.05000,1.257
22,mlp.gate_proj,0.0014311959,0.05000,1.275
22,mlp.down_proj,0.0001264588,0.05000,4.373
23,self_attn.q_proj,0.0006088087,0.05000,2.115
23,self_attn.k_proj,0.0000888351,0.05000,2.155
23,self_attn.v_proj,0.0000476617,0.05000,2.223
23,self_attn.o_proj,0.0000646709,0.05000,0.644
23,mlp.up_proj,0.0014787837,0.05000,1.319
23,mlp.gate_proj,0.0014157394,0.05000,1.447
23,mlp.down_proj,0.0001453594,0.05000,4.296
24,self_attn.v_proj,0.0000535015,0.05000,1.915
24,self_attn.k_proj,0.0000606524,0.05000,1.972
24,self_attn.q_proj,0.0004475276,0.05000,1.990
24,self_attn.o_proj,0.0000783631,0.05000,0.887
24,mlp.gate_proj,0.0013949360,0.05000,1.192
24,mlp.up_proj,0.0014352342,0.05000,1.383
24,mlp.down_proj,0.0001567249,0.05000,4.689
25,self_attn.k_proj,0.0000588555,0.05000,2.278
25,self_attn.v_proj,0.0000853784,0.05000,2.371
25,self_attn.q_proj,0.0006110521,0.05000,2.532
25,self_attn.o_proj,0.0000598715,0.05000,0.982
25,mlp.up_proj,0.0015592789,0.05000,1.336
25,mlp.gate_proj,0.0015332821,0.05000,1.365
25,mlp.down_proj,0.0001992039,0.05000,3.003
26,self_attn.q_proj,0.0004816819,0.05000,1.612
26,self_attn.v_proj,0.0000801456,0.05000,1.647
26,self_attn.k_proj,0.0000613980,0.05000,1.656
26,self_attn.o_proj,0.0000884467,0.05000,0.513
26,mlp.gate_proj,0.0016698752,0.05000,1.040
26,mlp.up_proj,0.0017734253,0.05000,1.054
26,mlp.down_proj,0.0002655405,0.05000,3.050
27,self_attn.v_proj,0.0001156350,0.05000,1.534
27,self_attn.k_proj,0.0000694705,0.05000,1.571
27,self_attn.q_proj,0.0008163267,0.05000,1.579
27,self_attn.o_proj,0.0001265619,0.05000,0.511
27,mlp.up_proj,0.0018934087,0.05000,0.999
27,mlp.gate_proj,0.0018705411,0.05000,1.017
27,mlp.down_proj,0.0003228687,0.05000,2.862
28,self_attn.k_proj,0.0000650905,0.05000,2.290
28,self_attn.q_proj,0.0006038837,0.05000,2.303
28,self_attn.v_proj,0.0001092675,0.05000,2.350
28,self_attn.o_proj,0.0001192229,0.05000,0.755
28,mlp.up_proj,0.0021129013,0.05000,1.005
28,mlp.gate_proj,0.0021004773,0.05000,1.026
28,mlp.down_proj,0.0003793565,0.05000,2.973
29,self_attn.q_proj,0.0005846746,0.05000,1.838
29,self_attn.k_proj,0.0000670443,0.05000,1.869
29,self_attn.v_proj,0.0000986339,0.05000,1.879
29,self_attn.o_proj,0.0000909046,0.05000,0.470
29,mlp.up_proj,0.0025685690,0.05000,1.072
29,mlp.gate_proj,0.0025046702,0.05000,1.101
29,mlp.down_proj,0.0005272932,0.05000,3.049
30,self_attn.v_proj,0.0002365673,0.05000,1.805
30,self_attn.q_proj,0.0008247248,0.05000,1.814
30,self_attn.k_proj,0.0000707403,0.05000,1.832
30,self_attn.o_proj,0.0000990036,0.05000,0.519
30,mlp.up_proj,0.0035084721,0.05000,0.990
30,mlp.gate_proj,0.0032103522,0.05000,1.040
30,mlp.down_proj,0.0053239912,0.05000,2.915
31,self_attn.q_proj,0.0009805325,0.05000,1.691
31,self_attn.v_proj,0.0002592109,0.05000,1.704
31,self_attn.k_proj,0.0000985458,0.05000,1.714
31,self_attn.o_proj,0.0002562517,0.05000,0.501
31,mlp.up_proj,0.0041799812,0.05000,1.191
31,mlp.gate_proj,0.0036576450,0.05000,1.214
31,mlp.down_proj,0.0017657173,0.05000,4.082
32,self_attn.v_proj,0.0007043787,0.05000,2.017
32,self_attn.k_proj,0.0001399740,0.05000,2.077
32,self_attn.q_proj,0.0015637881,0.05000,2.086
32,self_attn.o_proj,0.0002418083,0.05000,0.756
32,mlp.up_proj,0.0043462683,0.05000,1.288
32,mlp.gate_proj,0.0038090997,0.05000,1.308
32,mlp.down_proj,0.0018351650,0.05000,4.499
33,self_attn.v_proj,0.0015008834,0.05000,2.409
33,self_attn.k_proj,0.0001224033,0.05000,2.454
33,self_attn.q_proj,0.0014767564,0.05000,2.478
33,self_attn.o_proj,0.0002690905,0.05000,0.725
33,mlp.gate_proj,0.0035885438,0.05000,1.361
33,mlp.up_proj,0.0044905547,0.05000,1.399
33,mlp.down_proj,0.0034433876,0.05000,4.359
34,self_attn.v_proj,0.0002083482,0.05000,2.172
34,self_attn.q_proj,0.0006153963,0.05000,2.220
34,self_attn.k_proj,0.0000626373,0.05000,2.271
34,self_attn.o_proj,0.0002969657,0.05000,0.690
34,mlp.gate_proj,0.0039339582,0.05000,1.359
34,mlp.up_proj,0.0045975191,0.05000,1.368
34,mlp.down_proj,0.0025222073,0.05000,3.361
35,self_attn.v_proj,0.0001811058,0.05000,1.607
35,self_attn.k_proj,0.0000604469,0.05000,1.616
35,self_attn.q_proj,0.0005729022,0.05000,1.642
35,self_attn.o_proj,0.0002992628,0.05000,0.602
35,mlp.up_proj,0.0064264687,0.05000,1.194
35,mlp.gate_proj,0.0060764383,0.05000,1.285
35,mlp.down_proj,0.0061064307,0.05000,3.911
|