File size: 10,845 Bytes
aa09e08 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 | layer,module,loss,samples,damp,time
0,self_attn.o_proj,failsafe(rtn): 0.0018692,0.00000,0.365
0,self_attn.v_proj,0.0000002852,0.05000,2.313
0,self_attn.k_proj,0.0000001276,0.05000,2.319
0,self_attn.q_proj,0.0000026754,0.05000,2.338
0,self_attn.o_gate,0.0000056994,0.05000,0.734
0,mlp.up_proj,0.0000029969,0.05000,2.312
0,mlp.gate_proj,0.0000021178,0.05000,2.376
0,mlp.down_proj,0.0000014851,0.05000,4.314
1,self_attn.q_proj,0.0000214327,0.05000,3.077
1,self_attn.o_proj,0.0000006074,0.05000,3.104
1,self_attn.k_proj,0.0000217632,0.05000,3.117
1,self_attn.v_proj,0.0000276329,0.05000,3.120
1,mlp.gate_proj,0.0000019046,0.05000,2.313
1,mlp.up_proj,0.0000020071,0.05000,2.323
1,mlp.down_proj,0.0000005985,0.05000,4.283
2,self_attn.v_proj,0.0000127656,0.05000,3.089
2,self_attn.q_proj,0.0000108437,0.05000,3.128
2,self_attn.k_proj,0.0000107873,0.05000,3.132
2,self_attn.o_proj,0.0000002624,0.05000,3.137
2,mlp.gate_proj,0.0000023777,0.05000,2.511
2,mlp.up_proj,0.0000023888,0.05000,2.534
2,mlp.down_proj,0.0000010156,0.05000,4.452
3,self_attn.q_proj,0.0000134180,0.05000,3.093
3,self_attn.k_proj,0.0000137554,0.05000,3.125
3,self_attn.v_proj,0.0000144534,0.05000,3.134
3,self_attn.o_proj,0.0000003675,0.05000,3.141
3,mlp.gate_proj,0.0000048571,0.05000,2.296
3,mlp.up_proj,0.0000042930,0.05000,2.320
3,mlp.down_proj,0.0000015000,0.05000,4.277
4,self_attn.v_proj,0.0000128837,0.05000,3.061
4,self_attn.k_proj,0.0000108806,0.05000,3.094
4,self_attn.q_proj,0.0000106413,0.05000,3.105
4,self_attn.o_proj,0.0000002981,0.05000,3.112
4,mlp.up_proj,0.0000057718,0.05000,2.497
4,mlp.gate_proj,0.0000063566,0.05000,2.513
4,mlp.down_proj,0.0000016298,0.05000,4.424
5,self_attn.q_proj,0.0000115468,0.05000,3.022
5,self_attn.o_proj,0.0000004831,0.05000,3.064
5,self_attn.v_proj,0.0000130083,0.05000,3.067
5,self_attn.k_proj,0.0000121797,0.05000,3.070
5,mlp.gate_proj,0.0000078714,0.05000,2.288
5,mlp.up_proj,0.0000071630,0.05000,2.318
5,mlp.down_proj,0.0000017172,0.05000,4.243
6,self_attn.v_proj,0.0000116934,0.05000,3.043
6,self_attn.k_proj,0.0000133436,0.05000,3.057
6,self_attn.q_proj,0.0000111725,0.05000,3.073
6,self_attn.o_proj,0.0000004840,0.05000,3.078
6,mlp.gate_proj,0.0000095918,0.05000,2.280
6,mlp.up_proj,0.0000088430,0.05000,2.283
6,mlp.down_proj,0.0000019229,0.05000,4.203
7,self_attn.q_proj,0.0000099772,0.05000,3.036
7,self_attn.o_proj,0.0000005164,0.05000,3.066
7,self_attn.v_proj,0.0000109389,0.05000,3.073
7,self_attn.k_proj,0.0000121498,0.05000,3.076
7,mlp.gate_proj,0.0000098175,0.05000,2.284
7,mlp.up_proj,0.0000092494,0.05000,2.305
7,mlp.down_proj,0.0000022285,0.05000,4.234
8,self_attn.v_proj,0.0000075188,0.05000,3.149
8,self_attn.o_proj,0.0000008020,0.05000,3.157
8,self_attn.q_proj,0.0000069240,0.05000,3.163
8,self_attn.k_proj,0.0000085086,0.05000,3.165
8,mlp.up_proj,0.0000095218,0.05000,2.345
8,mlp.gate_proj,0.0000104520,0.05000,2.375
8,mlp.down_proj,0.0000021824,0.05000,4.301
9,self_attn.o_proj,failsafe(rtn): 0.0020905,0.00000,0.097
9,self_attn.v_proj,0.0000005230,0.05000,1.976
9,self_attn.k_proj,0.0000008173,0.05000,1.999
9,self_attn.q_proj,0.0000084612,0.05000,2.002
9,self_attn.o_gate,0.0000059209,0.05000,0.657
9,mlp.up_proj,0.0000109480,0.05000,2.284
9,mlp.gate_proj,0.0000114849,0.05000,2.299
9,mlp.down_proj,0.0000028129,0.05000,4.222
10,self_attn.q_proj,0.0000101656,0.05000,3.081
10,self_attn.k_proj,0.0000127152,0.05000,3.111
10,self_attn.v_proj,0.0000108378,0.05000,3.122
10,self_attn.o_proj,0.0000007537,0.05000,3.125
10,mlp.up_proj,0.0000110120,0.05000,2.622
10,mlp.gate_proj,0.0000113623,0.05000,2.642
10,mlp.down_proj,0.0000026269,0.05000,4.580
11,self_attn.v_proj,0.0000078631,0.05000,3.096
11,self_attn.o_proj,0.0000008522,0.05000,3.131
11,self_attn.q_proj,0.0000073996,0.05000,3.135
11,self_attn.k_proj,0.0000090127,0.05000,3.138
11,mlp.up_proj,0.0000111150,0.05000,2.283
11,mlp.gate_proj,0.0000110942,0.05000,2.297
11,mlp.down_proj,0.0000028067,0.05000,4.212
12,self_attn.q_proj,0.0000093563,0.05000,2.901
12,self_attn.o_proj,0.0000010849,0.05000,2.929
12,self_attn.v_proj,0.0000108863,0.05000,2.937
12,self_attn.k_proj,0.0000103044,0.05000,2.945
12,mlp.gate_proj,0.0000107282,0.05000,2.498
12,mlp.up_proj,0.0000109626,0.05000,2.522
12,mlp.down_proj,0.0000028672,0.05000,4.469
13,self_attn.v_proj,0.0000072914,0.05000,2.986
13,self_attn.k_proj,0.0000091078,0.05000,3.025
13,self_attn.o_proj,0.0000011265,0.05000,3.029
13,self_attn.q_proj,0.0000071241,0.05000,3.033
13,mlp.up_proj,0.0000113235,0.05000,2.353
13,mlp.gate_proj,0.0000108775,0.05000,2.361
13,mlp.down_proj,0.0000030396,0.05000,4.278
14,self_attn.q_proj,0.0000077357,0.05000,2.863
14,self_attn.o_proj,0.0000014332,0.05000,2.888
14,self_attn.k_proj,0.0000088931,0.05000,2.890
14,self_attn.v_proj,0.0000076432,0.05000,2.892
14,mlp.gate_proj,0.0000103080,0.05000,2.610
14,mlp.up_proj,0.0000113153,0.05000,2.619
14,mlp.down_proj,0.0000034036,0.05000,4.571
15,self_attn.v_proj,0.0000053299,0.05000,2.866
15,self_attn.o_proj,0.0000019097,0.05000,2.893
15,self_attn.k_proj,0.0000064892,0.05000,2.896
15,self_attn.q_proj,0.0000050327,0.05000,2.903
15,mlp.up_proj,0.0000109781,0.05000,2.230
15,mlp.gate_proj,0.0000106926,0.05000,2.242
15,mlp.down_proj,0.0000029907,0.05000,4.144
16,self_attn.o_proj,failsafe(rtn): 0.0021057,0.00000,0.097
16,self_attn.k_proj,0.0000007730,0.05000,1.877
16,self_attn.v_proj,0.0000004408,0.05000,1.895
16,self_attn.q_proj,0.0000079157,0.05000,1.906
16,self_attn.o_gate,0.0000046221,0.05000,0.658
16,mlp.up_proj,0.0000137838,0.05000,2.611
16,mlp.gate_proj,0.0000140992,0.05000,2.626
16,mlp.down_proj,0.0000044268,0.05000,4.544
17,self_attn.o_proj,failsafe(rtn): 0.0021667,0.00000,0.101
17,self_attn.v_proj,0.0000006478,0.05000,1.965
17,self_attn.k_proj,0.0000007183,0.05000,1.973
17,self_attn.q_proj,0.0000081989,0.05000,1.978
17,self_attn.o_gate,0.0000054954,0.05000,0.656
17,mlp.up_proj,0.0000150867,0.05000,2.610
17,mlp.gate_proj,0.0000165420,0.05000,2.632
17,mlp.down_proj,0.0000043397,0.05000,4.550
18,self_attn.q_proj,0.0000070497,0.05000,3.084
18,self_attn.o_proj,0.0000013994,0.05000,3.107
18,self_attn.v_proj,0.0000071203,0.05000,3.117
18,self_attn.k_proj,0.0000099659,0.05000,3.124
18,mlp.gate_proj,0.0000142639,0.05000,2.629
18,mlp.up_proj,0.0000137292,0.05000,2.666
18,mlp.down_proj,0.0000037223,0.05000,4.590
19,self_attn.v_proj,0.0000092909,0.05000,2.909
19,self_attn.k_proj,0.0000115645,0.05000,2.937
19,self_attn.q_proj,0.0000088469,0.05000,2.947
19,self_attn.o_proj,0.0000013371,0.05000,2.955
19,mlp.up_proj,0.0000140925,0.05000,2.559
19,mlp.gate_proj,0.0000137925,0.05000,2.579
19,mlp.down_proj,0.0000041741,0.05000,4.491
20,self_attn.q_proj,0.0000069671,0.05000,2.973
20,self_attn.o_proj,0.0000021632,0.05000,3.001
20,self_attn.k_proj,0.0000094110,0.05000,3.024
20,self_attn.v_proj,0.0000069630,0.05000,3.029
20,mlp.gate_proj,0.0000137304,0.05000,2.482
20,mlp.up_proj,0.0000145074,0.05000,2.509
20,mlp.down_proj,0.0000054941,0.05000,4.432
21,self_attn.v_proj,0.0000077745,0.05000,2.941
21,self_attn.k_proj,0.0000082770,0.05000,2.965
21,self_attn.q_proj,0.0000072783,0.05000,2.979
21,self_attn.o_proj,0.0000032530,0.05000,2.983
21,mlp.up_proj,0.0000138135,0.05000,2.564
21,mlp.gate_proj,0.0000128529,0.05000,2.599
21,mlp.down_proj,0.0000065183,0.05000,4.596
22,self_attn.o_proj,failsafe(rtn): 0.0022430,0.00000,0.095
22,self_attn.q_proj,0.0000099461,0.05000,1.938
22,self_attn.k_proj,0.0000006408,0.05000,1.975
22,self_attn.v_proj,0.0000011084,0.05000,1.992
22,self_attn.o_gate,0.0000052852,0.05000,0.681
22,mlp.gate_proj,0.0000165691,0.05000,2.439
22,mlp.up_proj,0.0000177798,0.05000,2.445
22,mlp.down_proj,0.0000097134,0.05000,4.381
23,self_attn.o_proj,0.0000029966,0.05000,3.032
23,self_attn.q_proj,0.0000097131,0.05000,3.045
23,self_attn.k_proj,0.0000124430,0.05000,3.058
23,self_attn.v_proj,0.0000093706,0.05000,3.072
23,mlp.gate_proj,0.0000193721,0.05000,2.553
23,mlp.up_proj,0.0000208169,0.05000,2.603
23,mlp.down_proj,0.0000116066,0.05000,4.526
24,self_attn.v_proj,0.0000083977,0.05000,3.006
24,self_attn.q_proj,0.0000082685,0.05000,3.034
24,self_attn.k_proj,0.0000102885,0.05000,3.048
24,self_attn.o_proj,0.0000050330,0.05000,3.051
24,mlp.up_proj,0.0000245762,0.05000,2.473
24,mlp.gate_proj,0.0000227001,0.05000,2.480
24,mlp.down_proj,0.0000188121,0.05000,4.402
25,self_attn.q_proj,0.0000086529,0.05000,2.938
25,self_attn.v_proj,0.0000085304,0.05000,2.963
25,self_attn.o_proj,0.0000065483,0.05000,2.971
25,self_attn.k_proj,0.0000110142,0.05000,2.979
25,mlp.gate_proj,0.0000263007,0.05000,2.514
25,mlp.up_proj,0.0000289325,0.05000,2.557
25,mlp.down_proj,0.0000330309,0.05000,4.460
26,self_attn.v_proj,0.0000135194,0.05000,2.966
26,self_attn.k_proj,0.0000197288,0.05000,2.985
26,self_attn.q_proj,0.0000138416,0.05000,2.996
26,self_attn.o_proj,0.0000122633,0.05000,3.003
26,mlp.gate_proj,0.0000293281,0.05000,2.578
26,mlp.up_proj,0.0000331938,0.05000,2.618
26,mlp.down_proj,0.0000286714,0.05000,4.516
27,self_attn.q_proj,0.0000129731,0.05000,3.048
27,self_attn.o_proj,0.0000077469,0.05000,3.072
27,self_attn.v_proj,0.0000121207,0.05000,3.081
27,self_attn.k_proj,0.0000142424,0.05000,3.085
27,mlp.up_proj,0.0000383397,0.05000,2.579
27,mlp.gate_proj,0.0000332575,0.05000,2.599
27,mlp.down_proj,0.0000373985,0.05000,4.558
28,self_attn.k_proj,0.0000229849,0.05000,2.961
28,self_attn.o_proj,0.0000238381,0.05000,2.987
28,self_attn.v_proj,0.0000170875,0.05000,2.990
28,self_attn.q_proj,0.0000166143,0.05000,2.999
28,mlp.up_proj,0.0000445356,0.05000,2.436
28,mlp.gate_proj,0.0000377255,0.05000,2.483
28,mlp.down_proj,0.0000521327,0.05000,4.401
29,self_attn.o_proj,failsafe(rtn): 0.0025024,0.00000,0.103
29,self_attn.q_proj,0.0000212811,0.05000,1.898
29,self_attn.v_proj,0.0000108000,0.05000,1.928
29,self_attn.k_proj,0.0000011077,0.05000,1.941
29,self_attn.o_gate,0.0000235504,0.05000,0.674
29,mlp.up_proj,0.0000567625,0.05000,2.402
29,mlp.gate_proj,0.0000467000,0.05000,2.418
29,mlp.down_proj,0.0000849603,0.05000,4.344
30,self_attn.o_proj,failsafe(rtn): 0.0025940,0.00000,0.097
30,self_attn.q_proj,0.0000326366,0.05000,1.932
30,self_attn.v_proj,0.0000335974,0.05000,1.933
30,self_attn.k_proj,0.0000012899,0.05000,1.966
30,self_attn.o_gate,0.0000374967,0.05000,0.668
30,mlp.up_proj,0.0000729923,0.05000,3.208
30,mlp.gate_proj,0.0000604775,0.05000,3.370
30,mlp.down_proj,0.0001841053,0.05000,5.231
31,self_attn.o_proj,failsafe(rtn): 0.0024261,0.00000,0.112
31,self_attn.q_proj,0.0000223886,0.05000,2.671
31,self_attn.v_proj,0.0000052386,0.05000,2.788
31,self_attn.k_proj,0.0000008344,0.05000,2.925
31,self_attn.o_gate,0.0000234286,0.05000,0.822
31,mlp.up_proj,0.0001053831,0.05000,3.133
31,mlp.gate_proj,0.0000944571,0.05000,3.336
31,mlp.down_proj,0.0007573602,0.05000,5.207
|