File size: 10,845 Bytes
3121222 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 | layer,module,loss,samples,damp,time
0,mlp.gate_proj,0.0000019575,0.05000,2.282
0,mlp.up_proj,0.0000027544,0.05000,2.295
0,mlp.down_proj,0.0000010484,0.05000,3.485
0,self_attn.o_proj,failsafe(rtn): 0.0018692,0.00000,0.133
0,self_attn.k_proj,0.0000001276,0.05000,1.896
0,self_attn.q_proj,0.0000026756,0.05000,1.900
0,self_attn.v_proj,0.0000002852,0.05000,1.926
0,self_attn.o_gate,0.0000056989,0.05000,0.416
1,mlp.up_proj,0.0000019776,0.05000,2.523
1,mlp.gate_proj,0.0000018764,0.05000,2.584
1,mlp.down_proj,0.0000005803,0.05000,3.835
1,self_attn.v_proj,0.0000274517,0.05000,3.357
1,self_attn.o_proj,0.0000006033,0.05000,3.382
1,self_attn.q_proj,0.0000212897,0.05000,3.414
1,self_attn.k_proj,0.0000216100,0.05000,3.420
2,mlp.gate_proj,0.0000023568,0.05000,2.246
2,mlp.up_proj,0.0000023672,0.05000,2.256
2,mlp.down_proj,0.0000010073,0.05000,3.446
2,self_attn.q_proj,0.0000107203,0.05000,2.823
2,self_attn.v_proj,0.0000126155,0.05000,2.848
2,self_attn.o_proj,0.0000002600,0.05000,2.851
2,self_attn.k_proj,0.0000106647,0.05000,2.854
3,mlp.up_proj,0.0000042655,0.05000,2.150
3,mlp.gate_proj,0.0000048281,0.05000,2.155
3,mlp.down_proj,0.0000014951,0.05000,3.343
3,self_attn.v_proj,0.0000143808,0.05000,2.827
3,self_attn.q_proj,0.0000133468,0.05000,2.856
3,self_attn.o_proj,0.0000003673,0.05000,2.865
3,self_attn.k_proj,0.0000136954,0.05000,2.868
4,mlp.up_proj,0.0000057488,0.05000,2.549
4,mlp.gate_proj,0.0000063297,0.05000,2.552
4,mlp.down_proj,0.0000016230,0.05000,3.892
4,self_attn.q_proj,0.0000106056,0.05000,3.481
4,self_attn.v_proj,0.0000128426,0.05000,3.494
4,self_attn.k_proj,0.0000108418,0.05000,3.509
4,self_attn.o_proj,0.0000002972,0.05000,3.519
5,mlp.up_proj,0.0000071429,0.05000,2.613
5,mlp.gate_proj,0.0000078490,0.05000,2.635
5,mlp.down_proj,0.0000017132,0.05000,3.949
5,self_attn.v_proj,0.0000129791,0.05000,3.126
5,self_attn.k_proj,0.0000121584,0.05000,3.347
5,self_attn.q_proj,0.0000115177,0.05000,3.356
5,self_attn.o_proj,0.0000004817,0.05000,3.377
6,mlp.up_proj,0.0000088197,0.05000,2.675
6,mlp.gate_proj,0.0000095654,0.05000,2.748
6,mlp.down_proj,0.0000019187,0.05000,3.957
6,self_attn.o_proj,0.0000004831,0.05000,2.738
6,self_attn.q_proj,0.0000111541,0.05000,2.795
6,self_attn.k_proj,0.0000133217,0.05000,2.824
6,self_attn.v_proj,0.0000116766,0.05000,2.834
7,mlp.up_proj,0.0000092302,0.05000,1.289
7,mlp.gate_proj,0.0000097952,0.05000,1.293
7,mlp.down_proj,0.0000022242,0.05000,2.475
7,self_attn.v_proj,0.0000109299,0.05000,2.075
7,self_attn.k_proj,0.0000121253,0.05000,2.156
7,self_attn.q_proj,0.0000099677,0.05000,2.286
7,self_attn.o_proj,0.0000005160,0.05000,2.289
8,mlp.gate_proj,0.0000104388,0.05000,1.974
8,mlp.up_proj,0.0000095090,0.05000,2.055
8,mlp.down_proj,0.0000021816,0.05000,3.211
8,self_attn.k_proj,0.0000085051,0.05000,2.614
8,self_attn.v_proj,0.0000075190,0.05000,2.743
8,self_attn.o_proj,0.0000008019,0.05000,2.750
8,self_attn.q_proj,0.0000069249,0.05000,2.772
9,mlp.gate_proj,0.0000114569,0.05000,1.817
9,mlp.up_proj,0.0000109181,0.05000,1.822
9,mlp.down_proj,0.0000028727,0.05000,3.040
9,self_attn.o_proj,failsafe(rtn): 0.0020905,0.00000,0.095
9,self_attn.v_proj,0.0000005232,0.05000,1.285
9,self_attn.k_proj,0.0000008164,0.05000,1.304
9,self_attn.q_proj,0.0000084581,0.05000,1.328
9,self_attn.o_gate,0.0000059179,0.05000,0.404
10,mlp.up_proj,0.0000110046,0.05000,1.756
10,mlp.gate_proj,0.0000113558,0.05000,1.940
10,mlp.down_proj,0.0000026231,0.05000,3.068
10,self_attn.q_proj,0.0000101732,0.05000,2.230
10,self_attn.o_proj,0.0000007549,0.05000,2.262
10,self_attn.k_proj,0.0000127262,0.05000,2.365
10,self_attn.v_proj,0.0000108463,0.05000,2.387
11,mlp.up_proj,0.0000111043,0.05000,2.183
11,mlp.gate_proj,0.0000110840,0.05000,2.187
11,mlp.down_proj,0.0000028018,0.05000,3.373
11,self_attn.v_proj,0.0000078623,0.05000,2.592
11,self_attn.k_proj,0.0000090132,0.05000,2.733
11,self_attn.o_proj,0.0000008556,0.05000,2.759
11,self_attn.q_proj,0.0000073984,0.05000,2.772
12,mlp.gate_proj,0.0000107157,0.05000,2.134
12,mlp.up_proj,0.0000109496,0.05000,2.144
12,mlp.down_proj,0.0000028660,0.05000,3.327
12,self_attn.v_proj,0.0000108852,0.05000,2.999
12,self_attn.q_proj,0.0000093560,0.05000,3.022
12,self_attn.o_proj,0.0000010866,0.05000,3.033
12,self_attn.k_proj,0.0000102958,0.05000,3.039
13,mlp.up_proj,0.0000113037,0.05000,2.134
13,mlp.gate_proj,0.0000108564,0.05000,2.150
13,mlp.down_proj,0.0000030354,0.05000,3.340
13,self_attn.o_proj,0.0000011258,0.05000,2.630
13,self_attn.q_proj,0.0000071202,0.05000,2.637
13,self_attn.v_proj,0.0000072887,0.05000,2.655
13,self_attn.k_proj,0.0000090965,0.05000,2.658
14,mlp.gate_proj,0.0000102864,0.05000,2.116
14,mlp.up_proj,0.0000112935,0.05000,2.131
14,mlp.down_proj,0.0000033973,0.05000,3.311
14,self_attn.q_proj,0.0000077342,0.05000,2.791
14,self_attn.k_proj,0.0000088883,0.05000,2.801
14,self_attn.o_proj,0.0000014352,0.05000,2.806
14,self_attn.v_proj,0.0000076372,0.05000,2.810
15,mlp.up_proj,0.0000109415,0.05000,2.113
15,mlp.gate_proj,0.0000106577,0.05000,2.117
15,mlp.down_proj,0.0000029832,0.05000,3.304
15,self_attn.v_proj,0.0000053197,0.05000,2.740
15,self_attn.k_proj,0.0000064845,0.05000,2.750
15,self_attn.q_proj,0.0000050206,0.05000,2.756
15,self_attn.o_proj,0.0000019126,0.05000,2.758
16,mlp.up_proj,0.0000153010,0.05000,2.529
16,mlp.gate_proj,0.0000156530,0.05000,2.533
16,mlp.down_proj,0.0000057073,0.05000,3.721
16,self_attn.o_proj,failsafe(rtn): 0.0021057,0.00000,0.100
16,self_attn.q_proj,0.0000078978,0.05000,1.962
16,self_attn.k_proj,0.0000007718,0.05000,1.963
16,self_attn.v_proj,0.0000004404,0.05000,1.968
16,self_attn.o_gate,0.0000046133,0.05000,0.430
17,mlp.gate_proj,0.0000172837,0.05000,2.243
17,mlp.up_proj,0.0000157615,0.05000,2.252
17,mlp.down_proj,0.0000047822,0.05000,3.442
17,self_attn.o_proj,failsafe(rtn): 0.0021667,0.00000,0.103
17,self_attn.q_proj,0.0000081782,0.05000,1.955
17,self_attn.k_proj,0.0000007155,0.05000,1.958
17,self_attn.v_proj,0.0000006458,0.05000,1.977
17,self_attn.o_gate,0.0000054807,0.05000,0.410
18,mlp.up_proj,0.0000136515,0.05000,2.710
18,mlp.gate_proj,0.0000141853,0.05000,2.718
18,mlp.down_proj,0.0000036975,0.05000,3.911
18,self_attn.q_proj,0.0000070243,0.05000,3.069
18,self_attn.k_proj,0.0000099378,0.05000,3.079
18,self_attn.v_proj,0.0000070944,0.05000,3.091
18,self_attn.o_proj,0.0000013901,0.05000,3.093
19,mlp.up_proj,0.0000140069,0.05000,2.184
19,mlp.gate_proj,0.0000137057,0.05000,2.188
19,mlp.down_proj,0.0000041433,0.05000,3.382
19,self_attn.v_proj,0.0000092466,0.05000,2.955
19,self_attn.k_proj,0.0000115108,0.05000,2.976
19,self_attn.q_proj,0.0000087993,0.05000,2.978
19,self_attn.o_proj,0.0000013312,0.05000,2.981
20,mlp.gate_proj,0.0000136300,0.05000,2.360
20,mlp.up_proj,0.0000144002,0.05000,2.367
20,mlp.down_proj,0.0000054458,0.05000,3.551
20,self_attn.k_proj,0.0000093558,0.05000,2.430
20,self_attn.v_proj,0.0000069220,0.05000,2.485
20,self_attn.q_proj,0.0000069259,0.05000,2.488
20,self_attn.o_proj,0.0000021510,0.05000,2.491
21,mlp.up_proj,0.0000136941,0.05000,1.901
21,mlp.gate_proj,0.0000127453,0.05000,2.069
21,mlp.down_proj,0.0000064656,0.05000,3.211
21,self_attn.o_proj,0.0000032435,0.05000,2.564
21,self_attn.k_proj,0.0000082270,0.05000,2.652
21,self_attn.q_proj,0.0000072292,0.05000,2.698
21,self_attn.v_proj,0.0000077253,0.05000,2.702
22,mlp.gate_proj,0.0000178435,0.05000,2.226
22,mlp.up_proj,0.0000191504,0.05000,2.237
22,mlp.down_proj,0.0000115692,0.05000,3.429
22,self_attn.o_proj,failsafe(rtn): 0.0022430,0.00000,0.105
22,self_attn.q_proj,0.0000098795,0.05000,1.564
22,self_attn.v_proj,0.0000011001,0.05000,1.603
22,self_attn.k_proj,0.0000006362,0.05000,1.802
22,self_attn.o_gate,0.0000052467,0.05000,0.556
23,mlp.up_proj,0.0000205944,0.05000,2.568
23,mlp.gate_proj,0.0000191698,0.05000,2.574
23,mlp.down_proj,0.0000114501,0.05000,3.838
23,self_attn.k_proj,0.0000123230,0.05000,3.183
23,self_attn.v_proj,0.0000092833,0.05000,3.241
23,self_attn.q_proj,0.0000096182,0.05000,3.242
23,self_attn.o_proj,0.0000029686,0.05000,3.246
24,mlp.up_proj,0.0000243043,0.05000,2.174
24,mlp.gate_proj,0.0000224462,0.05000,2.178
24,mlp.down_proj,0.0000185971,0.05000,3.367
24,self_attn.o_proj,0.0000049888,0.05000,2.975
24,self_attn.v_proj,0.0000083148,0.05000,2.986
24,self_attn.k_proj,0.0000101886,0.05000,2.996
24,self_attn.q_proj,0.0000081873,0.05000,3.004
25,mlp.gate_proj,0.0000260207,0.05000,2.247
25,mlp.up_proj,0.0000286292,0.05000,2.265
25,mlp.down_proj,0.0000327358,0.05000,3.458
25,self_attn.k_proj,0.0000109056,0.05000,3.008
25,self_attn.o_proj,0.0000064882,0.05000,3.029
25,self_attn.v_proj,0.0000084491,0.05000,3.036
25,self_attn.q_proj,0.0000085720,0.05000,3.038
26,mlp.gate_proj,0.0000290269,0.05000,2.488
26,mlp.up_proj,0.0000328542,0.05000,2.502
26,mlp.down_proj,0.0000283380,0.05000,3.690
26,self_attn.o_proj,0.0000122008,0.05000,2.982
26,self_attn.k_proj,0.0000195720,0.05000,2.992
26,self_attn.q_proj,0.0000137304,0.05000,2.997
26,self_attn.v_proj,0.0000134189,0.05000,2.999
27,mlp.up_proj,0.0000379276,0.05000,2.567
27,mlp.gate_proj,0.0000329030,0.05000,2.567
27,mlp.down_proj,0.0000369176,0.05000,3.824
27,self_attn.v_proj,0.0000120097,0.05000,2.916
27,self_attn.q_proj,0.0000128456,0.05000,2.921
27,self_attn.o_proj,0.0000076826,0.05000,2.924
27,self_attn.k_proj,0.0000141221,0.05000,2.928
28,mlp.gate_proj,0.0000373178,0.05000,2.244
28,mlp.up_proj,0.0000440524,0.05000,2.267
28,mlp.down_proj,0.0000514231,0.05000,3.471
28,self_attn.k_proj,0.0000228180,0.05000,3.043
28,self_attn.v_proj,0.0000169691,0.05000,3.054
28,self_attn.q_proj,0.0000164989,0.05000,3.059
28,self_attn.o_proj,0.0000238898,0.05000,3.062
29,mlp.up_proj,0.0000564963,0.05000,2.573
29,mlp.gate_proj,0.0000464832,0.05000,2.598
29,mlp.down_proj,0.0000851531,0.05000,3.805
29,self_attn.o_proj,failsafe(rtn): 0.0025024,0.00000,0.113
29,self_attn.v_proj,0.0000107679,0.05000,2.037
29,self_attn.q_proj,0.0000211549,0.05000,2.046
29,self_attn.k_proj,0.0000011045,0.05000,2.075
29,self_attn.o_gate,0.0000234260,0.05000,0.413
30,mlp.gate_proj,0.0000589105,0.05000,2.553
30,mlp.up_proj,0.0000711039,0.05000,2.561
30,mlp.down_proj,0.0001752567,0.05000,3.852
30,self_attn.o_proj,failsafe(rtn): 0.0025940,0.00000,0.099
30,self_attn.k_proj,0.0000012830,0.05000,2.137
30,self_attn.q_proj,0.0000324827,0.05000,2.140
30,self_attn.v_proj,0.0000334367,0.05000,2.190
30,self_attn.o_gate,0.0000373244,0.05000,0.436
31,mlp.up_proj,0.0001038903,0.05000,2.291
31,mlp.gate_proj,0.0000930895,0.05000,2.361
31,mlp.down_proj,0.0007491196,0.05000,3.548
31,self_attn.o_proj,failsafe(rtn): 0.0024261,0.00000,0.107
31,self_attn.k_proj,0.0000008259,0.05000,2.147
31,self_attn.v_proj,0.0000051840,0.05000,2.153
31,self_attn.q_proj,0.0000221746,0.05000,2.155
31,self_attn.o_gate,0.0000232037,0.05000,0.409
|