File size: 10,845 Bytes
f6f0b6f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 | layer,module,loss,samples,damp,time
0,self_attn.o_proj,failsafe(rtn): 0.0018692,0.00000,0.138
0,self_attn.q_proj,0.0000025054,0.05000,1.933
0,self_attn.v_proj,0.0000002673,0.05000,1.967
0,self_attn.k_proj,0.0000001197,0.05000,1.976
0,self_attn.o_gate,0.0000053353,0.05000,0.394
0,mlp.up_proj,0.0000028122,0.05000,2.091
0,mlp.gate_proj,0.0000019878,0.05000,2.116
0,mlp.down_proj,0.0000014094,0.05000,3.193
1,self_attn.q_proj,0.0000198860,0.05000,2.327
1,self_attn.o_proj,0.0000005668,0.05000,2.348
1,self_attn.k_proj,0.0000202024,0.05000,2.356
1,self_attn.v_proj,0.0000256484,0.05000,2.360
1,mlp.up_proj,0.0000018768,0.05000,1.999
1,mlp.gate_proj,0.0000017809,0.05000,2.000
1,mlp.down_proj,0.0000005710,0.05000,3.093
2,self_attn.k_proj,0.0000100614,0.05000,2.109
2,self_attn.q_proj,0.0000101246,0.05000,2.183
2,self_attn.o_proj,0.0000002428,0.05000,2.218
2,self_attn.v_proj,0.0000119164,0.05000,2.239
2,mlp.gate_proj,0.0000022318,0.05000,1.951
2,mlp.up_proj,0.0000022416,0.05000,2.021
2,mlp.down_proj,0.0000009728,0.05000,3.080
3,self_attn.q_proj,0.0000125645,0.05000,2.153
3,self_attn.v_proj,0.0000135237,0.05000,2.170
3,self_attn.o_proj,0.0000003379,0.05000,2.181
3,self_attn.k_proj,0.0000128744,0.05000,2.214
3,mlp.gate_proj,0.0000045511,0.05000,1.978
3,mlp.up_proj,0.0000040209,0.05000,2.037
3,mlp.down_proj,0.0000014397,0.05000,3.115
4,self_attn.q_proj,0.0000099334,0.05000,2.707
4,self_attn.v_proj,0.0000120185,0.05000,2.720
4,self_attn.o_proj,0.0000002742,0.05000,2.739
4,self_attn.k_proj,0.0000101482,0.05000,2.742
4,mlp.up_proj,0.0000054010,0.05000,1.751
4,mlp.gate_proj,0.0000059484,0.05000,1.881
4,mlp.down_proj,0.0000015523,0.05000,2.930
5,self_attn.q_proj,0.0000107690,0.05000,2.388
5,self_attn.v_proj,0.0000121319,0.05000,2.565
5,self_attn.k_proj,0.0000113703,0.05000,2.571
5,self_attn.o_proj,0.0000004462,0.05000,2.596
5,mlp.gate_proj,0.0000073629,0.05000,2.027
5,mlp.up_proj,0.0000066999,0.05000,2.040
5,mlp.down_proj,0.0000016318,0.05000,3.135
6,self_attn.k_proj,0.0000124642,0.05000,2.605
6,self_attn.q_proj,0.0000104139,0.05000,2.671
6,self_attn.o_proj,0.0000004455,0.05000,2.673
6,self_attn.v_proj,0.0000109036,0.05000,2.687
6,mlp.up_proj,0.0000082335,0.05000,2.047
6,mlp.gate_proj,0.0000089300,0.05000,2.052
6,mlp.down_proj,0.0000018200,0.05000,3.152
7,self_attn.k_proj,0.0000113131,0.05000,2.759
7,self_attn.v_proj,0.0000101746,0.05000,2.777
7,self_attn.o_proj,0.0000004802,0.05000,2.778
7,self_attn.q_proj,0.0000092834,0.05000,2.784
7,mlp.up_proj,0.0000086123,0.05000,2.016
7,mlp.gate_proj,0.0000091405,0.05000,2.034
7,mlp.down_proj,0.0000021072,0.05000,3.119
8,self_attn.o_proj,0.0000007475,0.05000,2.422
8,self_attn.v_proj,0.0000070005,0.05000,2.433
8,self_attn.q_proj,0.0000064478,0.05000,2.495
8,self_attn.k_proj,0.0000079277,0.05000,2.502
8,mlp.up_proj,0.0000088850,0.05000,3.785
8,mlp.gate_proj,0.0000097536,0.05000,3.857
8,mlp.down_proj,0.0000020750,0.05000,6.063
9,self_attn.o_proj,failsafe(rtn): 0.0020905,0.00000,0.102
9,self_attn.q_proj,0.0000078441,0.05000,1.728
9,self_attn.k_proj,0.0000007585,0.05000,1.730
9,self_attn.v_proj,0.0000004848,0.05000,1.765
9,self_attn.o_gate,0.0000054848,0.05000,0.385
9,mlp.gate_proj,0.0000107061,0.05000,2.022
9,mlp.up_proj,0.0000102051,0.05000,2.036
9,mlp.down_proj,0.0000026359,0.05000,3.114
10,self_attn.o_proj,0.0000006941,0.05000,2.442
10,self_attn.k_proj,0.0000118222,0.05000,2.463
10,self_attn.q_proj,0.0000094431,0.05000,2.511
10,self_attn.v_proj,0.0000100697,0.05000,2.516
10,mlp.up_proj,0.0000102459,0.05000,1.732
10,mlp.gate_proj,0.0000105735,0.05000,1.865
10,mlp.down_proj,0.0000024634,0.05000,2.896
11,self_attn.q_proj,0.0000068717,0.05000,2.337
11,self_attn.o_proj,0.0000007806,0.05000,2.350
11,self_attn.v_proj,0.0000072992,0.05000,2.433
11,self_attn.k_proj,0.0000083771,0.05000,2.435
11,mlp.gate_proj,0.0000102914,0.05000,2.056
11,mlp.up_proj,0.0000103112,0.05000,2.066
11,mlp.down_proj,0.0000026183,0.05000,3.164
12,self_attn.k_proj,0.0000095307,0.05000,2.719
12,self_attn.o_proj,0.0000009991,0.05000,2.808
12,self_attn.q_proj,0.0000086598,0.05000,2.811
12,self_attn.v_proj,0.0000100696,0.05000,2.829
12,mlp.up_proj,0.0000101439,0.05000,2.039
12,mlp.gate_proj,0.0000099271,0.05000,2.045
12,mlp.down_proj,0.0000026657,0.05000,3.134
13,self_attn.o_proj,0.0000010317,0.05000,2.791
13,self_attn.q_proj,0.0000065795,0.05000,2.790
13,self_attn.v_proj,0.0000067338,0.05000,2.802
13,self_attn.k_proj,0.0000084106,0.05000,2.804
13,mlp.up_proj,0.0000104438,0.05000,2.054
13,mlp.gate_proj,0.0000100330,0.05000,2.062
13,mlp.down_proj,0.0000028064,0.05000,3.163
14,self_attn.k_proj,0.0000081929,0.05000,2.847
14,self_attn.o_proj,0.0000013027,0.05000,2.863
14,self_attn.q_proj,0.0000071310,0.05000,2.869
14,self_attn.v_proj,0.0000070400,0.05000,2.872
14,mlp.gate_proj,0.0000094866,0.05000,1.740
14,mlp.up_proj,0.0000104130,0.05000,1.846
14,mlp.down_proj,0.0000031477,0.05000,2.910
15,self_attn.o_proj,0.0000017505,0.05000,2.218
15,self_attn.q_proj,0.0000046234,0.05000,2.263
15,self_attn.v_proj,0.0000049011,0.05000,2.275
15,self_attn.k_proj,0.0000059547,0.05000,2.337
15,mlp.gate_proj,0.0000098456,0.05000,1.961
15,mlp.up_proj,0.0000101065,0.05000,2.010
15,mlp.down_proj,0.0000027747,0.05000,3.079
16,self_attn.o_proj,failsafe(rtn): 0.0021057,0.00000,0.100
16,self_attn.q_proj,0.0000072823,0.05000,1.562
16,self_attn.v_proj,0.0000004060,0.05000,1.615
16,self_attn.k_proj,0.0000007123,0.05000,1.691
16,self_attn.o_gate,0.0000042507,0.05000,0.451
16,mlp.up_proj,0.0000126712,0.05000,1.675
16,mlp.gate_proj,0.0000129596,0.05000,1.690
16,mlp.down_proj,0.0000040638,0.05000,2.776
17,self_attn.o_proj,failsafe(rtn): 0.0021667,0.00000,0.101
17,self_attn.q_proj,0.0000075214,0.05000,1.732
17,self_attn.v_proj,0.0000005942,0.05000,1.763
17,self_attn.k_proj,0.0000006580,0.05000,1.767
17,self_attn.o_gate,0.0000050420,0.05000,0.386
17,mlp.gate_proj,0.0000151962,0.05000,1.814
17,mlp.up_proj,0.0000138615,0.05000,1.927
17,mlp.down_proj,0.0000039890,0.05000,2.982
18,self_attn.k_proj,0.0000091951,0.05000,2.866
18,self_attn.v_proj,0.0000065547,0.05000,2.887
18,self_attn.o_proj,0.0000012725,0.05000,2.897
18,self_attn.q_proj,0.0000064936,0.05000,2.902
18,mlp.up_proj,0.0000126479,0.05000,2.038
18,mlp.gate_proj,0.0000131430,0.05000,2.050
18,mlp.down_proj,0.0000034282,0.05000,3.146
19,self_attn.v_proj,0.0000085492,0.05000,4.782
19,self_attn.q_proj,0.0000081404,0.05000,4.843
19,self_attn.k_proj,0.0000106452,0.05000,4.891
19,self_attn.o_proj,0.0000012228,0.05000,4.896
19,mlp.gate_proj,0.0000127026,0.05000,2.054
19,mlp.up_proj,0.0000129798,0.05000,2.060
19,mlp.down_proj,0.0000038460,0.05000,3.153
20,self_attn.o_proj,0.0000019678,0.05000,2.087
20,self_attn.v_proj,0.0000063993,0.05000,2.091
20,self_attn.k_proj,0.0000086538,0.05000,2.108
20,self_attn.q_proj,0.0000064077,0.05000,2.143
20,mlp.gate_proj,0.0000126178,0.05000,2.050
20,mlp.up_proj,0.0000133294,0.05000,2.058
20,mlp.down_proj,0.0000050352,0.05000,3.156
21,self_attn.q_proj,0.0000066632,0.05000,2.744
21,self_attn.o_proj,0.0000029935,0.05000,2.773
21,self_attn.v_proj,0.0000071241,0.05000,2.776
21,self_attn.k_proj,0.0000075829,0.05000,2.777
21,mlp.gate_proj,0.0000117755,0.05000,2.060
21,mlp.up_proj,0.0000126541,0.05000,2.066
21,mlp.down_proj,0.0000059720,0.05000,3.155
22,self_attn.o_proj,failsafe(rtn): 0.0022430,0.00000,0.110
22,self_attn.q_proj,0.0000090866,0.05000,1.417
22,self_attn.v_proj,0.0000010102,0.05000,1.445
22,self_attn.k_proj,0.0000005854,0.05000,1.667
22,self_attn.o_gate,0.0000048253,0.05000,0.779
22,mlp.up_proj,0.0000161680,0.05000,2.067
22,mlp.gate_proj,0.0000150658,0.05000,2.070
22,mlp.down_proj,0.0000087456,0.05000,3.160
23,self_attn.v_proj,0.0000085384,0.05000,2.781
23,self_attn.k_proj,0.0000113444,0.05000,2.789
23,self_attn.q_proj,0.0000088479,0.05000,2.795
23,self_attn.o_proj,0.0000027456,0.05000,2.799
23,mlp.gate_proj,0.0000176874,0.05000,2.017
23,mlp.up_proj,0.0000190031,0.05000,2.016
23,mlp.down_proj,0.0000105173,0.05000,3.106
24,self_attn.o_proj,0.0000046072,0.05000,2.394
24,self_attn.v_proj,0.0000076524,0.05000,2.431
24,self_attn.q_proj,0.0000075387,0.05000,2.491
24,self_attn.k_proj,0.0000093884,0.05000,2.491
24,mlp.gate_proj,0.0000207282,0.05000,1.679
24,mlp.up_proj,0.0000224395,0.05000,1.695
24,mlp.down_proj,0.0000171202,0.05000,2.841
25,self_attn.q_proj,0.0000078854,0.05000,2.658
25,self_attn.o_proj,0.0000059637,0.05000,2.663
25,self_attn.v_proj,0.0000077710,0.05000,2.721
25,self_attn.k_proj,0.0000100391,0.05000,2.728
25,mlp.gate_proj,0.0000239999,0.05000,2.067
25,mlp.up_proj,0.0000264045,0.05000,2.088
25,mlp.down_proj,0.0000301652,0.05000,3.176
26,self_attn.q_proj,0.0000126346,0.05000,2.771
26,self_attn.v_proj,0.0000123492,0.05000,2.776
26,self_attn.o_proj,0.0000112689,0.05000,2.795
26,self_attn.k_proj,0.0000180101,0.05000,2.801
26,mlp.gate_proj,0.0000267217,0.05000,2.057
26,mlp.up_proj,0.0000302445,0.05000,2.072
26,mlp.down_proj,0.0000262709,0.05000,3.169
27,self_attn.k_proj,0.0000130042,0.05000,4.944
27,self_attn.v_proj,0.0000110681,0.05000,4.947
27,self_attn.q_proj,0.0000118468,0.05000,4.956
27,self_attn.o_proj,0.0000070263,0.05000,4.957
27,mlp.up_proj,0.0000350091,0.05000,2.034
27,mlp.gate_proj,0.0000303793,0.05000,2.047
27,mlp.down_proj,0.0000345169,0.05000,3.155
28,self_attn.v_proj,0.0000156234,0.05000,2.886
28,self_attn.o_proj,0.0000215157,0.05000,2.916
28,self_attn.q_proj,0.0000151926,0.05000,2.922
28,self_attn.k_proj,0.0000210312,0.05000,2.927
28,mlp.up_proj,0.0000407514,0.05000,1.989
28,mlp.gate_proj,0.0000345202,0.05000,2.030
28,mlp.down_proj,0.0000481835,0.05000,3.106
29,self_attn.o_proj,failsafe(rtn): 0.0025024,0.00000,0.085
29,self_attn.q_proj,0.0000194512,0.05000,1.404
29,self_attn.v_proj,0.0000098854,0.05000,1.465
29,self_attn.k_proj,0.0000010127,0.05000,1.586
29,self_attn.o_gate,0.0000215202,0.05000,0.486
29,mlp.gate_proj,0.0000427544,0.05000,1.730
29,mlp.up_proj,0.0000519517,0.05000,1.899
29,mlp.down_proj,0.0000784172,0.05000,2.944
30,self_attn.o_proj,failsafe(rtn): 0.0025940,0.00000,0.114
30,self_attn.q_proj,0.0000298560,0.05000,1.735
30,self_attn.v_proj,0.0000307847,0.05000,1.778
30,self_attn.k_proj,0.0000011827,0.05000,1.793
30,self_attn.o_gate,0.0000342893,0.05000,0.404
30,mlp.gate_proj,0.0000556245,0.05000,2.027
30,mlp.up_proj,0.0000671554,0.05000,2.046
30,mlp.down_proj,0.0001689895,0.05000,3.163
31,self_attn.o_proj,failsafe(rtn): 0.0024261,0.00000,0.110
31,self_attn.q_proj,0.0000205567,0.05000,1.736
31,self_attn.k_proj,0.0000007648,0.05000,1.774
31,self_attn.v_proj,0.0000048051,0.05000,1.782
31,self_attn.o_gate,0.0000215028,0.05000,0.393
31,mlp.gate_proj,0.0000875096,0.05000,3.228
31,mlp.up_proj,0.0000976488,0.05000,3.286
31,mlp.down_proj,0.0006907178,0.05000,5.370
|