File size: 9,067 Bytes
c1709c7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 | layer,module,loss,samples,damp,time
0,self_attn.k_proj,0.0000066317,0.05000,5.604
0,self_attn.v_proj,0.0000003162,0.05000,5.657
0,self_attn.q_proj,0.0000131101,0.05000,5.664
0,self_attn.o_proj,0.0000000105,0.05000,1.036
0,mlp.up_proj,0.0000043037,0.05000,2.764
0,mlp.gate_proj,0.0000049660,0.05000,2.821
0,mlp.down_proj,0.0000000337,0.05000,3.033
1,self_attn.q_proj,0.0000134517,0.05000,5.363
1,self_attn.k_proj,0.0000079466,0.05000,5.439
1,self_attn.v_proj,0.0000008457,0.05000,5.470
1,self_attn.o_proj,0.0000000309,0.05000,1.609
1,mlp.up_proj,0.0000054533,0.05000,2.981
1,mlp.gate_proj,0.0000062155,0.05000,3.048
1,mlp.down_proj,0.0000013564,0.05000,3.028
2,self_attn.v_proj,0.0000031549,0.05000,5.545
2,self_attn.k_proj,0.0000328911,0.05000,5.608
2,self_attn.q_proj,0.0000544763,0.05000,5.630
2,self_attn.o_proj,0.0000000260,0.05000,1.056
2,mlp.up_proj,0.0000075630,0.05000,2.302
2,mlp.gate_proj,0.0000090424,0.05000,2.328
2,mlp.down_proj,0.0000000819,0.05000,2.929
3,self_attn.v_proj,0.0000024446,0.05000,5.036
3,self_attn.q_proj,0.0000284173,0.05000,5.095
3,self_attn.k_proj,0.0000160375,0.05000,5.110
3,self_attn.o_proj,0.0000000395,0.05000,0.981
3,mlp.up_proj,0.0000099246,0.05000,2.359
3,mlp.gate_proj,0.0000134570,0.05000,2.377
3,mlp.down_proj,0.0000001285,0.05000,2.936
4,self_attn.q_proj,0.0000308824,0.05000,4.856
4,self_attn.k_proj,0.0000161503,0.05000,4.937
4,self_attn.v_proj,0.0000027480,0.05000,4.945
4,self_attn.o_proj,0.0000001472,0.05000,1.006
4,mlp.up_proj,0.0000110954,0.05000,2.308
4,mlp.gate_proj,0.0000169268,0.05000,2.318
4,mlp.down_proj,0.0000002153,0.05000,2.895
5,self_attn.k_proj,0.0000247132,0.05000,4.874
5,self_attn.v_proj,0.0000025206,0.05000,4.912
5,self_attn.q_proj,0.0000401912,0.05000,4.949
5,self_attn.o_proj,0.0000002016,0.05000,1.024
5,mlp.up_proj,0.0000116495,0.05000,2.398
5,mlp.gate_proj,0.0000167069,0.05000,2.427
5,mlp.down_proj,0.0000002504,0.05000,2.914
6,self_attn.q_proj,0.0000297058,0.05000,4.908
6,self_attn.v_proj,0.0000020855,0.05000,4.930
6,self_attn.k_proj,0.0000157571,0.05000,4.957
6,self_attn.o_proj,0.0000002040,0.05000,0.995
6,mlp.gate_proj,0.0000173706,0.05000,2.219
6,mlp.up_proj,0.0000121039,0.05000,2.235
6,mlp.down_proj,0.0000002704,0.05000,2.928
7,self_attn.v_proj,0.0000017357,0.05000,4.881
7,self_attn.q_proj,0.0000230821,0.05000,4.937
7,self_attn.k_proj,0.0000135448,0.05000,4.972
7,self_attn.o_proj,0.0000003555,0.05000,1.008
7,mlp.up_proj,0.0000111421,0.05000,2.348
7,mlp.gate_proj,0.0000147502,0.05000,2.371
7,mlp.down_proj,0.0000002646,0.05000,2.943
8,self_attn.v_proj,0.0000018656,0.05000,4.844
8,self_attn.k_proj,0.0000156979,0.05000,4.940
8,self_attn.q_proj,0.0000260248,0.05000,4.954
8,self_attn.o_proj,0.0000002554,0.05000,1.032
8,mlp.gate_proj,0.0000151389,0.05000,2.306
8,mlp.up_proj,0.0000111552,0.05000,2.328
8,mlp.down_proj,0.0000002605,0.05000,2.917
9,self_attn.k_proj,0.0000143108,0.05000,4.860
9,self_attn.q_proj,0.0000242022,0.05000,4.928
9,self_attn.v_proj,0.0000023695,0.05000,4.949
9,self_attn.o_proj,0.0000003200,0.05000,1.004
9,mlp.gate_proj,0.0000136877,0.05000,2.331
9,mlp.up_proj,0.0000103751,0.05000,2.352
9,mlp.down_proj,0.0000002325,0.05000,3.009
10,self_attn.k_proj,0.0000141120,0.05000,4.792
10,self_attn.v_proj,0.0000016791,0.05000,4.899
10,self_attn.q_proj,0.0000224765,0.05000,4.928
10,self_attn.o_proj,0.0000003049,0.05000,0.988
10,mlp.gate_proj,0.0000133799,0.05000,2.362
10,mlp.up_proj,0.0000108373,0.05000,2.385
10,mlp.down_proj,0.0000002523,0.05000,2.923
11,self_attn.v_proj,0.0000018817,0.05000,4.830
11,self_attn.k_proj,0.0000098131,0.05000,4.870
11,self_attn.q_proj,0.0000172427,0.05000,4.893
11,self_attn.o_proj,0.0000003372,0.05000,1.023
11,mlp.up_proj,0.0000116445,0.05000,2.369
11,mlp.gate_proj,0.0000139223,0.05000,2.385
11,mlp.down_proj,0.0000002836,0.05000,2.904
12,self_attn.k_proj,0.0000158544,0.05000,4.890
12,self_attn.v_proj,0.0000021823,0.05000,4.930
12,self_attn.q_proj,0.0000265778,0.05000,4.956
12,self_attn.o_proj,0.0000004284,0.05000,1.025
12,mlp.up_proj,0.0000118799,0.05000,2.475
12,mlp.gate_proj,0.0000142177,0.05000,2.500
12,mlp.down_proj,0.0000003101,0.05000,2.907
13,self_attn.q_proj,0.0000267522,0.05000,4.748
13,self_attn.k_proj,0.0000171389,0.05000,4.842
13,self_attn.v_proj,0.0000025023,0.05000,4.862
13,self_attn.o_proj,0.0000003842,0.05000,1.012
13,mlp.up_proj,0.0000133185,0.05000,2.264
13,mlp.gate_proj,0.0000171810,0.05000,2.266
13,mlp.down_proj,0.0000004323,0.05000,2.890
14,self_attn.v_proj,0.0000028622,0.05000,4.864
14,self_attn.k_proj,0.0000135327,0.05000,4.916
14,self_attn.q_proj,0.0000299978,0.05000,4.935
14,self_attn.o_proj,0.0000006734,0.05000,1.056
14,mlp.gate_proj,0.0000172195,0.05000,2.320
14,mlp.up_proj,0.0000132037,0.05000,2.334
14,mlp.down_proj,0.0000004415,0.05000,2.914
15,self_attn.v_proj,0.0000026275,0.05000,4.870
15,self_attn.q_proj,0.0000276026,0.05000,4.926
15,self_attn.k_proj,0.0000143523,0.05000,4.975
15,self_attn.o_proj,0.0000004534,0.05000,1.019
15,mlp.up_proj,0.0000123739,0.05000,2.333
15,mlp.gate_proj,0.0000171509,0.05000,2.347
15,mlp.down_proj,0.0000004026,0.05000,2.899
16,self_attn.v_proj,0.0000027276,0.05000,4.838
16,self_attn.k_proj,0.0000147427,0.05000,4.913
16,self_attn.q_proj,0.0000258635,0.05000,4.952
16,self_attn.o_proj,0.0000002295,0.05000,1.028
16,mlp.up_proj,0.0000119168,0.05000,2.310
16,mlp.gate_proj,0.0000168227,0.05000,2.335
16,mlp.down_proj,0.0000003651,0.05000,2.977
17,self_attn.k_proj,0.0000133368,0.05000,4.867
17,self_attn.v_proj,0.0000026553,0.05000,4.914
17,self_attn.q_proj,0.0000245140,0.05000,4.957
17,self_attn.o_proj,0.0000001955,0.05000,1.024
17,mlp.up_proj,0.0000118180,0.05000,2.375
17,mlp.gate_proj,0.0000168303,0.05000,2.379
17,mlp.down_proj,0.0000003665,0.05000,2.947
18,self_attn.k_proj,0.0000160067,0.05000,4.867
18,self_attn.q_proj,0.0000288517,0.05000,4.866
18,self_attn.v_proj,0.0000034172,0.05000,4.914
18,self_attn.o_proj,0.0000001835,0.05000,1.004
18,mlp.gate_proj,0.0000170495,0.05000,2.334
18,mlp.up_proj,0.0000122552,0.05000,2.338
18,mlp.down_proj,0.0000003414,0.05000,2.982
19,self_attn.q_proj,0.0000253104,0.05000,4.907
19,self_attn.k_proj,0.0000146613,0.05000,4.945
19,self_attn.v_proj,0.0000032850,0.05000,4.955
19,self_attn.o_proj,0.0000002477,0.05000,1.016
19,mlp.up_proj,0.0000119743,0.05000,2.340
19,mlp.gate_proj,0.0000164426,0.05000,2.355
19,mlp.down_proj,0.0000004131,0.05000,2.901
20,self_attn.k_proj,0.0000152328,0.05000,4.858
20,self_attn.v_proj,0.0000038224,0.05000,4.910
20,self_attn.q_proj,0.0000253048,0.05000,4.936
20,self_attn.o_proj,0.0000002420,0.05000,1.045
20,mlp.up_proj,0.0000115511,0.05000,2.373
20,mlp.gate_proj,0.0000150089,0.05000,2.387
20,mlp.down_proj,0.0000003303,0.05000,2.899
21,self_attn.v_proj,0.0000050870,0.05000,4.867
21,self_attn.q_proj,0.0000256488,0.05000,4.907
21,self_attn.k_proj,0.0000151884,0.05000,4.927
21,self_attn.o_proj,0.0000002438,0.05000,0.994
21,mlp.gate_proj,0.0000151286,0.05000,2.249
21,mlp.up_proj,0.0000114986,0.05000,2.266
21,mlp.down_proj,0.0000003446,0.05000,2.943
22,self_attn.v_proj,0.0000047463,0.05000,4.865
22,self_attn.k_proj,0.0000134800,0.05000,4.898
22,self_attn.q_proj,0.0000235887,0.05000,4.917
22,self_attn.o_proj,0.0000002382,0.05000,1.044
22,mlp.up_proj,0.0000117654,0.05000,2.241
22,mlp.gate_proj,0.0000155538,0.05000,2.263
22,mlp.down_proj,0.0000003936,0.05000,2.922
23,self_attn.q_proj,0.0000251569,0.05000,4.883
23,self_attn.k_proj,0.0000155190,0.05000,4.924
23,self_attn.v_proj,0.0000047481,0.05000,4.948
23,self_attn.o_proj,0.0000002655,0.05000,0.997
23,mlp.up_proj,0.0000127930,0.05000,2.364
23,mlp.gate_proj,0.0000180163,0.05000,2.367
23,mlp.down_proj,0.0000005834,0.05000,2.914
24,self_attn.v_proj,0.0000067991,0.05000,4.847
24,self_attn.k_proj,0.0000161390,0.05000,4.889
24,self_attn.q_proj,0.0000259698,0.05000,4.929
24,self_attn.o_proj,0.0000007835,0.05000,1.002
24,mlp.up_proj,0.0000131828,0.05000,2.274
24,mlp.gate_proj,0.0000190814,0.05000,2.282
24,mlp.down_proj,0.0000006735,0.05000,2.910
25,self_attn.q_proj,0.0000204543,0.05000,4.784
25,self_attn.v_proj,0.0000046968,0.05000,4.855
25,self_attn.k_proj,0.0000104107,0.05000,4.872
25,self_attn.o_proj,0.0000006803,0.05000,1.025
25,mlp.up_proj,0.0000139469,0.05000,2.367
25,mlp.gate_proj,0.0000205439,0.05000,2.381
25,mlp.down_proj,0.0000011351,0.05000,3.000
26,self_attn.v_proj,0.0000073377,0.05000,4.803
26,self_attn.q_proj,0.0000218441,0.05000,4.932
26,self_attn.k_proj,0.0000136581,0.05000,4.972
26,self_attn.o_proj,0.0000008289,0.05000,1.021
26,mlp.up_proj,0.0000149937,0.05000,2.296
26,mlp.gate_proj,0.0000223784,0.05000,2.312
26,mlp.down_proj,0.0000026421,0.05000,2.993
27,self_attn.q_proj,0.0000155607,0.05000,4.809
27,self_attn.v_proj,0.0000045720,0.05000,4.870
27,self_attn.k_proj,0.0000087533,0.05000,4.896
27,self_attn.o_proj,0.0000035290,0.05000,1.047
27,mlp.up_proj,0.0000168011,0.05000,2.397
27,mlp.gate_proj,0.0000220308,0.05000,2.413
27,mlp.down_proj,0.0000244650,0.05000,2.948
|