File size: 11,667 Bytes
316b026 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 | layer,module,loss,samples,damp,time
0,self_attn.v_proj,0.0000000281,0.05000,4.670
0,self_attn.k_proj,0.0000000335,0.05000,4.718
0,self_attn.q_proj,0.0000001225,0.05000,4.760
0,self_attn.o_proj,0.0000000638,0.05000,1.479
0,mlp.gate_proj,0.0000083850,0.05000,1.913
0,mlp.up_proj,0.0000072816,0.05000,1.916
0,mlp.down_proj,0.0000004270,0.05000,3.658
1,self_attn.v_proj,0.0000000562,0.05000,4.730
1,self_attn.q_proj,0.0000002102,0.05000,4.830
1,self_attn.k_proj,0.0000000580,0.05000,4.839
1,self_attn.o_proj,0.0000000563,0.05000,1.447
1,mlp.gate_proj,0.0002669175,0.05000,2.458
1,mlp.up_proj,0.0001192833,0.05000,2.516
1,mlp.down_proj,0.0000007279,0.05000,3.921
2,self_attn.v_proj,0.0000001416,0.05000,4.924
2,self_attn.q_proj,0.0000005442,0.05000,4.999
2,self_attn.k_proj,0.0000001599,0.05000,5.030
2,self_attn.o_proj,0.0000000774,0.05000,1.461
2,mlp.gate_proj,0.0004706195,0.05000,2.073
2,mlp.up_proj,0.0003679584,0.05000,2.095
2,mlp.down_proj,0.0000005296,0.05000,3.701
3,self_attn.v_proj,0.0000002583,0.05000,4.187
3,self_attn.k_proj,0.0000002787,0.05000,4.232
3,self_attn.q_proj,0.0000010549,0.05000,4.253
3,self_attn.o_proj,0.0000001789,0.05000,1.442
3,mlp.up_proj,0.0001859692,0.05000,1.984
3,mlp.gate_proj,0.0003880208,0.05000,2.003
3,mlp.down_proj,0.0000008290,0.05000,3.716
4,self_attn.v_proj,0.0000004453,0.05000,4.170
4,self_attn.q_proj,0.0000017503,0.05000,4.240
4,self_attn.k_proj,0.0000004929,0.05000,4.270
4,self_attn.o_proj,0.0000001841,0.05000,1.432
4,mlp.gate_proj,0.0003188560,0.05000,1.952
4,mlp.up_proj,0.0001058938,0.05000,1.977
4,mlp.down_proj,0.0000009877,0.05000,3.709
5,self_attn.v_proj,0.0000004747,0.05000,4.207
5,self_attn.k_proj,0.0000004808,0.05000,4.254
5,self_attn.q_proj,0.0000018170,0.05000,4.288
5,self_attn.o_proj,0.0000003358,0.05000,1.449
5,mlp.gate_proj,0.0001046843,0.05000,2.039
5,mlp.up_proj,0.0000414282,0.05000,2.070
5,mlp.down_proj,0.0000013782,0.05000,3.819
6,self_attn.v_proj,0.0000010176,0.05000,4.233
6,self_attn.q_proj,0.0000040303,0.05000,4.253
6,self_attn.k_proj,0.0000009895,0.05000,4.313
6,self_attn.o_proj,0.0000006622,0.05000,1.448
6,mlp.up_proj,0.0000503757,0.05000,2.014
6,mlp.gate_proj,0.0001110306,0.05000,2.035
6,mlp.down_proj,0.0000206670,0.05000,3.659
7,self_attn.k_proj,0.0000024329,0.05000,4.064
7,self_attn.q_proj,0.0000085931,0.05000,4.123
7,self_attn.v_proj,0.0000019982,0.05000,4.144
7,self_attn.o_proj,0.0000009256,0.05000,1.444
7,mlp.gate_proj,0.0001217811,0.05000,2.071
7,mlp.up_proj,0.0000577930,0.05000,2.092
7,mlp.down_proj,0.0000025102,0.05000,3.710
8,self_attn.v_proj,0.0000027738,0.05000,4.201
8,self_attn.q_proj,0.0000114360,0.05000,4.238
8,self_attn.k_proj,0.0000030762,0.05000,4.259
8,self_attn.o_proj,0.0000009674,0.05000,1.412
8,mlp.gate_proj,0.0000708923,0.05000,2.010
8,mlp.up_proj,0.0000563776,0.05000,2.033
8,mlp.down_proj,0.0000032682,0.05000,3.754
9,self_attn.v_proj,0.0000035083,0.05000,4.188
9,self_attn.k_proj,0.0000047883,0.05000,4.223
9,self_attn.q_proj,0.0000157103,0.05000,4.262
9,self_attn.o_proj,0.0000014919,0.05000,1.426
9,mlp.up_proj,0.0000711674,0.05000,2.011
9,mlp.gate_proj,0.0001187128,0.05000,2.031
9,mlp.down_proj,0.0000035517,0.05000,3.705
10,self_attn.k_proj,0.0000056646,0.05000,4.156
10,self_attn.q_proj,0.0000214887,0.05000,4.234
10,self_attn.v_proj,0.0000053920,0.05000,4.256
10,self_attn.o_proj,0.0000015159,0.05000,1.484
10,mlp.gate_proj,0.0000876355,0.05000,2.018
10,mlp.up_proj,0.0000582840,0.05000,2.034
10,mlp.down_proj,0.0000028648,0.05000,3.671
11,self_attn.k_proj,0.0000029401,0.05000,4.225
11,self_attn.v_proj,0.0000024203,0.05000,4.303
11,self_attn.q_proj,0.0000103068,0.05000,4.329
11,self_attn.o_proj,0.0000008888,0.05000,1.416
11,mlp.gate_proj,0.0000741166,0.05000,1.917
11,mlp.up_proj,0.0000566025,0.05000,1.941
11,mlp.down_proj,0.0000030860,0.05000,3.775
12,self_attn.k_proj,0.0000038472,0.05000,4.211
12,self_attn.q_proj,0.0000137759,0.05000,4.249
12,self_attn.v_proj,0.0000034272,0.05000,4.280
12,self_attn.o_proj,0.0000018546,0.05000,1.426
12,mlp.up_proj,0.0000580738,0.05000,2.071
12,mlp.gate_proj,0.0000685725,0.05000,2.094
12,mlp.down_proj,0.0000032841,0.05000,3.679
13,self_attn.v_proj,0.0000025200,0.05000,4.194
13,self_attn.q_proj,0.0000111981,0.05000,4.230
13,self_attn.k_proj,0.0000030891,0.05000,4.257
13,self_attn.o_proj,0.0000013833,0.05000,1.412
13,mlp.gate_proj,0.0000692568,0.05000,1.954
13,mlp.up_proj,0.0000647811,0.05000,1.973
13,mlp.down_proj,0.0000043834,0.05000,3.687
14,self_attn.q_proj,0.0000175752,0.05000,4.123
14,self_attn.k_proj,0.0000045994,0.05000,4.169
14,self_attn.v_proj,0.0000043383,0.05000,4.177
14,self_attn.o_proj,0.0000019435,0.05000,1.440
14,mlp.up_proj,0.0000720079,0.05000,1.933
14,mlp.gate_proj,0.0000766432,0.05000,1.958
14,mlp.down_proj,0.0000047673,0.05000,3.682
15,self_attn.k_proj,0.0000054351,0.05000,4.116
15,self_attn.v_proj,0.0000046581,0.05000,4.200
15,self_attn.q_proj,0.0000211606,0.05000,4.223
15,self_attn.o_proj,0.0000018997,0.05000,1.429
15,mlp.gate_proj,0.0000754266,0.05000,1.990
15,mlp.up_proj,0.0000740122,0.05000,2.015
15,mlp.down_proj,0.0000053597,0.05000,3.737
16,self_attn.q_proj,0.0000386672,0.05000,4.067
16,self_attn.k_proj,0.0000093139,0.05000,4.130
16,self_attn.v_proj,0.0000103499,0.05000,4.155
16,self_attn.o_proj,0.0000026699,0.05000,1.403
16,mlp.up_proj,0.0000855751,0.05000,2.041
16,mlp.gate_proj,0.0000930978,0.05000,2.061
16,mlp.down_proj,0.0000275581,0.05000,3.699
17,self_attn.q_proj,0.0000392438,0.05000,4.191
17,self_attn.k_proj,0.0000084165,0.05000,4.234
17,self_attn.v_proj,0.0000087300,0.05000,4.253
17,self_attn.o_proj,0.0000026301,0.05000,1.434
17,mlp.gate_proj,0.0000779856,0.05000,2.005
17,mlp.up_proj,0.0000724824,0.05000,2.024
17,mlp.down_proj,0.0000052021,0.05000,3.706
18,self_attn.q_proj,0.0000359746,0.05000,4.202
18,self_attn.v_proj,0.0000089155,0.05000,4.239
18,self_attn.k_proj,0.0000086172,0.05000,4.263
18,self_attn.o_proj,0.0000026857,0.05000,1.403
18,mlp.gate_proj,0.0000826436,0.05000,1.962
18,mlp.up_proj,0.0000786649,0.05000,1.977
18,mlp.down_proj,0.0000061125,0.05000,3.797
19,self_attn.k_proj,0.0000154321,0.05000,4.153
19,self_attn.v_proj,0.0000145288,0.05000,4.226
19,self_attn.q_proj,0.0000669893,0.05000,4.257
19,self_attn.o_proj,0.0000072475,0.05000,1.420
19,mlp.up_proj,0.0000763673,0.05000,1.958
19,mlp.gate_proj,0.0000795653,0.05000,1.983
19,mlp.down_proj,0.0000063999,0.05000,3.722
20,self_attn.k_proj,0.0000139938,0.05000,4.166
20,self_attn.v_proj,0.0000126008,0.05000,4.255
20,self_attn.q_proj,0.0000648620,0.05000,4.270
20,self_attn.o_proj,0.0000058633,0.05000,1.471
20,mlp.gate_proj,0.0000927177,0.05000,1.983
20,mlp.up_proj,0.0000909301,0.05000,2.005
20,mlp.down_proj,0.0000085226,0.05000,3.687
21,self_attn.v_proj,0.0000157023,0.05000,4.200
21,self_attn.q_proj,0.0000732686,0.05000,4.246
21,self_attn.k_proj,0.0000162576,0.05000,4.279
21,self_attn.o_proj,0.0000064938,0.05000,1.451
21,mlp.up_proj,0.0000938207,0.05000,2.061
21,mlp.gate_proj,0.0000953118,0.05000,2.069
21,mlp.down_proj,0.0000094180,0.05000,3.670
22,self_attn.v_proj,0.0000290647,0.05000,4.262
22,self_attn.q_proj,0.0001376551,0.05000,4.291
22,self_attn.k_proj,0.0000316722,0.05000,4.321
22,self_attn.o_proj,0.0000139498,0.05000,1.408
22,mlp.up_proj,0.0001028317,0.05000,1.960
22,mlp.gate_proj,0.0001130452,0.05000,1.982
22,mlp.down_proj,0.0000150741,0.05000,3.654
23,self_attn.q_proj,0.0001271593,0.05000,4.130
23,self_attn.k_proj,0.0000288629,0.05000,4.167
23,self_attn.v_proj,0.0000286624,0.05000,4.196
23,self_attn.o_proj,0.0000121074,0.05000,1.435
23,mlp.up_proj,0.0001123853,0.05000,1.978
23,mlp.gate_proj,0.0001318009,0.05000,2.002
23,mlp.down_proj,0.0000158443,0.05000,3.680
24,self_attn.k_proj,0.0000445231,0.05000,4.180
24,self_attn.v_proj,0.0000517156,0.05000,4.218
24,self_attn.q_proj,0.0001876024,0.05000,4.237
24,self_attn.o_proj,0.0000197756,0.05000,1.443
24,mlp.gate_proj,0.0001390896,0.05000,2.028
24,mlp.up_proj,0.0001163446,0.05000,2.052
24,mlp.down_proj,0.0000183784,0.05000,3.721
25,self_attn.k_proj,0.0000309183,0.05000,4.098
25,self_attn.v_proj,0.0000312976,0.05000,4.159
25,self_attn.q_proj,0.0001220958,0.05000,4.188
25,self_attn.o_proj,0.0000078953,0.05000,1.390
25,mlp.gate_proj,0.0001551162,0.05000,1.971
25,mlp.up_proj,0.0001274080,0.05000,1.993
25,mlp.down_proj,0.0000235486,0.05000,3.673
26,self_attn.v_proj,0.0000490624,0.05000,4.113
26,self_attn.k_proj,0.0000464167,0.05000,4.161
26,self_attn.q_proj,0.0001926069,0.05000,4.185
26,self_attn.o_proj,0.0000066098,0.05000,1.387
26,mlp.gate_proj,0.0001770693,0.05000,1.976
26,mlp.up_proj,0.0001508838,0.05000,1.975
26,mlp.down_proj,0.0000279487,0.05000,3.647
27,self_attn.k_proj,0.0000556290,0.05000,4.077
27,self_attn.q_proj,0.0002357567,0.05000,4.141
27,self_attn.v_proj,0.0000654981,0.05000,4.174
27,self_attn.o_proj,0.0000107555,0.05000,1.416
27,mlp.gate_proj,0.0001936240,0.05000,1.989
27,mlp.up_proj,0.0001708205,0.05000,2.008
27,mlp.down_proj,0.0000438858,0.05000,3.659
28,self_attn.k_proj,0.0000705007,0.05000,4.196
28,self_attn.v_proj,0.0000715719,0.05000,4.249
28,self_attn.q_proj,0.0002718625,0.05000,4.266
28,self_attn.o_proj,0.0000191249,0.05000,1.394
28,mlp.gate_proj,0.0002182023,0.05000,2.015
28,mlp.up_proj,0.0002019346,0.05000,2.031
28,mlp.down_proj,0.0000732407,0.05000,3.797
29,self_attn.k_proj,0.0001816301,0.05000,4.092
29,self_attn.q_proj,0.0007383766,0.05000,4.169
29,self_attn.v_proj,0.0002443326,0.05000,4.195
29,self_attn.o_proj,0.0000448207,0.05000,1.481
29,mlp.up_proj,0.0002562076,0.05000,1.942
29,mlp.gate_proj,0.0002613911,0.05000,1.958
29,mlp.down_proj,0.0000526463,0.05000,3.700
30,self_attn.v_proj,0.0002320336,0.05000,4.041
30,self_attn.k_proj,0.0001986943,0.05000,4.134
30,self_attn.q_proj,0.0007668271,0.05000,4.142
30,self_attn.o_proj,0.0000478573,0.05000,1.459
30,mlp.gate_proj,0.0002683302,0.05000,1.898
30,mlp.up_proj,0.0002727380,0.05000,1.920
30,mlp.down_proj,0.0000663494,0.05000,3.662
31,self_attn.q_proj,0.0010675234,0.05000,4.039
31,self_attn.k_proj,0.0003015756,0.05000,4.116
31,self_attn.v_proj,0.0003918752,0.05000,4.162
31,self_attn.o_proj,0.0000612622,0.05000,1.394
31,mlp.up_proj,0.0002870251,0.05000,2.027
31,mlp.gate_proj,0.0002682364,0.05000,2.032
31,mlp.down_proj,0.0000840589,0.05000,3.717
32,self_attn.q_proj,0.0016584295,0.05000,4.132
32,self_attn.v_proj,0.0006243234,0.05000,4.233
32,self_attn.k_proj,0.0004182623,0.05000,4.253
32,self_attn.o_proj,0.0001057172,0.05000,1.428
32,mlp.up_proj,0.0003151746,0.05000,1.983
32,mlp.gate_proj,0.0002878729,0.05000,2.006
32,mlp.down_proj,0.0001186975,0.05000,3.678
33,self_attn.v_proj,0.0016297800,0.05000,4.071
33,self_attn.k_proj,0.0008791048,0.05000,4.100
33,self_attn.q_proj,0.0041213692,0.05000,4.145
33,self_attn.o_proj,0.0001997127,0.05000,1.434
33,mlp.gate_proj,0.0003185476,0.05000,1.865
33,mlp.up_proj,0.0003529478,0.05000,1.881
33,mlp.down_proj,0.0001623388,0.05000,3.669
34,self_attn.q_proj,0.0027944569,0.05000,4.110
34,self_attn.k_proj,0.0006274358,0.05000,4.194
34,self_attn.v_proj,0.0009633384,0.05000,4.209
34,self_attn.o_proj,0.0002593519,0.05000,1.436
34,mlp.up_proj,0.0004195679,0.05000,1.947
34,mlp.gate_proj,0.0004068270,0.05000,1.970
34,mlp.down_proj,0.0003025797,0.05000,3.683
35,self_attn.v_proj,0.0003734207,0.05000,4.091
35,self_attn.k_proj,0.0003153036,0.05000,4.159
35,self_attn.q_proj,0.0011829730,0.05000,4.173
35,self_attn.o_proj,0.0001886140,0.05000,1.407
35,mlp.gate_proj,0.0009205786,0.05000,1.941
35,mlp.up_proj,0.0009714007,0.05000,1.939
35,mlp.down_proj,0.0008959190,0.05000,3.653
|