File size: 9,067 Bytes
c5fdea3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 | layer,module,loss,samples,damp,time
0,self_attn.k_proj,0.0000074878,0.05000,5.659
0,self_attn.q_proj,0.0000147233,0.05000,5.662
0,self_attn.v_proj,0.0000004164,0.05000,5.716
0,self_attn.o_proj,0.0000000188,0.05000,1.023
0,mlp.up_proj,0.0000073224,0.05000,2.317
0,mlp.gate_proj,0.0000083827,0.05000,2.350
0,mlp.down_proj,0.0000001028,0.05000,2.886
1,self_attn.k_proj,0.0000103207,0.05000,5.596
1,self_attn.q_proj,0.0000175984,0.05000,5.661
1,self_attn.v_proj,0.0000011597,0.05000,5.641
1,self_attn.o_proj,0.0000000697,0.05000,1.044
1,mlp.up_proj,0.0000099559,0.05000,2.788
1,mlp.gate_proj,0.0000113476,0.05000,2.853
1,mlp.down_proj,0.0000025962,0.05000,3.085
2,self_attn.q_proj,0.0000739263,0.05000,5.511
2,self_attn.k_proj,0.0000441155,0.05000,5.576
2,self_attn.v_proj,0.0000044463,0.05000,5.601
2,self_attn.o_proj,0.0000000672,0.05000,1.023
2,mlp.gate_proj,0.0000187338,0.05000,2.298
2,mlp.up_proj,0.0000158255,0.05000,2.314
2,mlp.down_proj,0.0000003259,0.05000,3.034
3,self_attn.k_proj,0.0000269141,0.05000,4.896
3,self_attn.v_proj,0.0000044659,0.05000,4.894
3,self_attn.q_proj,0.0000484370,0.05000,4.953
3,self_attn.o_proj,0.0000001267,0.05000,1.037
3,mlp.gate_proj,0.0000267179,0.05000,2.337
3,mlp.up_proj,0.0000199634,0.05000,2.363
3,mlp.down_proj,0.0000004599,0.05000,2.975
4,self_attn.k_proj,0.0000258326,0.05000,4.882
4,self_attn.v_proj,0.0000047591,0.05000,4.884
4,self_attn.q_proj,0.0000493107,0.05000,4.944
4,self_attn.o_proj,0.0000001978,0.05000,1.044
4,mlp.gate_proj,0.0000357187,0.05000,2.398
4,mlp.up_proj,0.0000239637,0.05000,2.397
4,mlp.down_proj,0.0000006565,0.05000,2.979
5,self_attn.k_proj,0.0000427736,0.05000,4.861
5,self_attn.q_proj,0.0000706749,0.05000,4.928
5,self_attn.v_proj,0.0000049181,0.05000,4.948
5,self_attn.o_proj,0.0000002486,0.05000,1.045
5,mlp.gate_proj,0.0000385495,0.05000,2.308
5,mlp.up_proj,0.0000273058,0.05000,2.328
5,mlp.down_proj,0.0000008979,0.05000,3.011
6,self_attn.k_proj,0.0000308670,0.05000,4.806
6,self_attn.q_proj,0.0000581398,0.05000,4.846
6,self_attn.v_proj,0.0000045627,0.05000,4.885
6,self_attn.o_proj,0.0000003760,0.05000,1.026
6,mlp.up_proj,0.0000291855,0.05000,2.248
6,mlp.gate_proj,0.0000411316,0.05000,2.276
6,mlp.down_proj,0.0000010291,0.05000,3.053
7,self_attn.v_proj,0.0000040204,0.05000,4.955
7,self_attn.q_proj,0.0000492176,0.05000,5.028
7,self_attn.k_proj,0.0000289958,0.05000,5.047
7,self_attn.o_proj,0.0000005308,0.05000,1.029
7,mlp.up_proj,0.0000311719,0.05000,2.240
7,mlp.gate_proj,0.0000403425,0.05000,2.263
7,mlp.down_proj,0.0000011793,0.05000,2.982
8,self_attn.q_proj,0.0000629039,0.05000,4.885
8,self_attn.v_proj,0.0000050514,0.05000,4.884
8,self_attn.k_proj,0.0000379682,0.05000,4.907
8,self_attn.o_proj,0.0000006740,0.05000,1.021
8,mlp.gate_proj,0.0000429643,0.05000,2.269
8,mlp.up_proj,0.0000325408,0.05000,2.289
8,mlp.down_proj,0.0000012385,0.05000,2.952
9,self_attn.k_proj,0.0000347599,0.05000,4.724
9,self_attn.v_proj,0.0000061279,0.05000,4.800
9,self_attn.q_proj,0.0000586902,0.05000,4.826
9,self_attn.o_proj,0.0000007288,0.05000,1.020
9,mlp.up_proj,0.0000325744,0.05000,2.317
9,mlp.gate_proj,0.0000417335,0.05000,2.339
9,mlp.down_proj,0.0000012440,0.05000,2.978
10,self_attn.k_proj,0.0000367776,0.05000,4.779
10,self_attn.v_proj,0.0000047437,0.05000,4.837
10,self_attn.q_proj,0.0000589483,0.05000,4.863
10,self_attn.o_proj,0.0000007045,0.05000,1.006
10,mlp.up_proj,0.0000355694,0.05000,2.252
10,mlp.gate_proj,0.0000426607,0.05000,2.269
10,mlp.down_proj,0.0000014252,0.05000,2.945
11,self_attn.v_proj,0.0000056424,0.05000,4.732
11,self_attn.k_proj,0.0000276094,0.05000,4.769
11,self_attn.q_proj,0.0000490194,0.05000,4.803
11,self_attn.o_proj,0.0000008918,0.05000,1.040
11,mlp.gate_proj,0.0000447233,0.05000,2.266
11,mlp.up_proj,0.0000384965,0.05000,2.282
11,mlp.down_proj,0.0000016130,0.05000,2.992
12,self_attn.v_proj,0.0000062276,0.05000,4.821
12,self_attn.q_proj,0.0000707292,0.05000,4.844
12,self_attn.k_proj,0.0000422481,0.05000,4.875
12,self_attn.o_proj,0.0000010586,0.05000,1.013
12,mlp.up_proj,0.0000412534,0.05000,2.241
12,mlp.gate_proj,0.0000475553,0.05000,2.264
12,mlp.down_proj,0.0000018156,0.05000,2.954
13,self_attn.q_proj,0.0000746849,0.05000,4.776
13,self_attn.k_proj,0.0000481807,0.05000,4.811
13,self_attn.v_proj,0.0000073153,0.05000,4.843
13,self_attn.o_proj,0.0000012458,0.05000,1.019
13,mlp.up_proj,0.0000458390,0.05000,2.347
13,mlp.gate_proj,0.0000565005,0.05000,2.372
13,mlp.down_proj,0.0000024355,0.05000,2.979
14,self_attn.k_proj,0.0000378307,0.05000,4.768
14,self_attn.q_proj,0.0000819438,0.05000,4.881
14,self_attn.v_proj,0.0000085098,0.05000,4.895
14,self_attn.o_proj,0.0000014740,0.05000,1.037
14,mlp.up_proj,0.0000496886,0.05000,2.248
14,mlp.gate_proj,0.0000617393,0.05000,2.267
14,mlp.down_proj,0.0000029373,0.05000,2.928
15,self_attn.v_proj,0.0000085283,0.05000,4.719
15,self_attn.k_proj,0.0000434987,0.05000,4.767
15,self_attn.q_proj,0.0000836970,0.05000,4.813
15,self_attn.o_proj,0.0000009290,0.05000,1.022
15,mlp.gate_proj,0.0000665099,0.05000,2.268
15,mlp.up_proj,0.0000498216,0.05000,2.299
15,mlp.down_proj,0.0000030499,0.05000,2.931
16,self_attn.q_proj,0.0000871859,0.05000,4.742
16,self_attn.k_proj,0.0000490420,0.05000,4.803
16,self_attn.v_proj,0.0000096564,0.05000,4.838
16,self_attn.o_proj,0.0000006811,0.05000,1.031
16,mlp.gate_proj,0.0000679251,0.05000,2.348
16,mlp.up_proj,0.0000497399,0.05000,2.371
16,mlp.down_proj,0.0000029397,0.05000,2.928
17,self_attn.q_proj,0.0000832187,0.05000,4.610
17,self_attn.v_proj,0.0000093413,0.05000,4.761
17,self_attn.k_proj,0.0000452292,0.05000,4.788
17,self_attn.o_proj,0.0000006960,0.05000,1.055
17,mlp.up_proj,0.0000515798,0.05000,2.270
17,mlp.gate_proj,0.0000716405,0.05000,2.285
17,mlp.down_proj,0.0000032355,0.05000,2.951
18,self_attn.v_proj,0.0000116876,0.05000,4.763
18,self_attn.k_proj,0.0000521751,0.05000,4.815
18,self_attn.q_proj,0.0000943415,0.05000,4.836
18,self_attn.o_proj,0.0000007594,0.05000,1.024
18,mlp.gate_proj,0.0000745484,0.05000,2.296
18,mlp.up_proj,0.0000550380,0.05000,2.313
18,mlp.down_proj,0.0000034218,0.05000,2.943
19,self_attn.q_proj,0.0000838091,0.05000,4.770
19,self_attn.v_proj,0.0000114893,0.05000,4.822
19,self_attn.k_proj,0.0000487867,0.05000,4.842
19,self_attn.o_proj,0.0000009830,0.05000,1.065
19,mlp.gate_proj,0.0000804051,0.05000,2.317
19,mlp.up_proj,0.0000600660,0.05000,2.329
19,mlp.down_proj,0.0000042528,0.05000,2.947
20,self_attn.q_proj,0.0000873402,0.05000,4.741
20,self_attn.v_proj,0.0000137784,0.05000,4.810
20,self_attn.k_proj,0.0000528894,0.05000,4.846
20,self_attn.o_proj,0.0000007206,0.05000,1.018
20,mlp.gate_proj,0.0000781699,0.05000,2.336
20,mlp.up_proj,0.0000613666,0.05000,2.355
20,mlp.down_proj,0.0000041109,0.05000,2.994
21,self_attn.q_proj,0.0000868639,0.05000,4.764
21,self_attn.k_proj,0.0000519479,0.05000,4.812
21,self_attn.v_proj,0.0000178721,0.05000,4.847
21,self_attn.o_proj,0.0000008751,0.05000,1.000
21,mlp.up_proj,0.0000650703,0.05000,2.354
21,mlp.gate_proj,0.0000835892,0.05000,2.381
21,mlp.down_proj,0.0000043956,0.05000,2.923
22,self_attn.v_proj,0.0000177718,0.05000,4.708
22,self_attn.k_proj,0.0000496925,0.05000,4.778
22,self_attn.q_proj,0.0000848907,0.05000,4.811
22,self_attn.o_proj,0.0000007073,0.05000,1.012
22,mlp.up_proj,0.0000702587,0.05000,2.325
22,mlp.gate_proj,0.0000909208,0.05000,2.340
22,mlp.down_proj,0.0000049920,0.05000,2.933
23,self_attn.v_proj,0.0000174651,0.05000,4.746
23,self_attn.q_proj,0.0000855378,0.05000,4.796
23,self_attn.k_proj,0.0000534188,0.05000,4.820
23,self_attn.o_proj,0.0000011784,0.05000,1.016
23,mlp.gate_proj,0.0001065173,0.05000,2.276
23,mlp.up_proj,0.0000778496,0.05000,2.298
23,mlp.down_proj,0.0000061454,0.05000,2.960
24,self_attn.v_proj,0.0000258239,0.05000,4.777
24,self_attn.k_proj,0.0000595539,0.05000,4.808
24,self_attn.q_proj,0.0000942610,0.05000,4.838
24,self_attn.o_proj,0.0000017130,0.05000,1.006
24,mlp.up_proj,0.0000840494,0.05000,2.249
24,mlp.gate_proj,0.0001173177,0.05000,2.264
24,mlp.down_proj,0.0000069910,0.05000,2.990
25,self_attn.q_proj,0.0000879538,0.05000,4.745
25,self_attn.k_proj,0.0000456781,0.05000,4.820
25,self_attn.v_proj,0.0000218952,0.05000,4.851
25,self_attn.o_proj,0.0000017481,0.05000,1.007
25,mlp.gate_proj,0.0001278075,0.05000,2.386
25,mlp.up_proj,0.0000906748,0.05000,2.420
25,mlp.down_proj,0.0000087553,0.05000,2.930
26,self_attn.v_proj,0.0000302323,0.05000,4.811
26,self_attn.q_proj,0.0000829504,0.05000,4.829
26,self_attn.k_proj,0.0000538115,0.05000,4.855
26,self_attn.o_proj,0.0000024757,0.05000,1.023
26,mlp.up_proj,0.0000947583,0.05000,2.376
26,mlp.gate_proj,0.0001355632,0.05000,2.382
26,mlp.down_proj,0.0000126745,0.05000,3.007
27,self_attn.k_proj,0.0000373252,0.05000,4.704
27,self_attn.v_proj,0.0000200833,0.05000,4.782
27,self_attn.q_proj,0.0000657733,0.05000,4.821
27,self_attn.o_proj,0.0000077384,0.05000,1.004
27,mlp.up_proj,0.0001052698,0.05000,2.324
27,mlp.gate_proj,0.0001328537,0.05000,2.335
27,mlp.down_proj,0.0000376315,0.05000,2.955
|