File size: 9,067 Bytes
54d983f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 | layer,module,loss,samples,damp,time
0,self_attn.v_proj,0.0000003655,0.05000,6.135
0,self_attn.q_proj,0.0000139163,0.05000,6.251
0,self_attn.k_proj,0.0000070522,0.05000,6.287
0,self_attn.o_proj,0.0000000161,0.05000,1.438
0,mlp.up_proj,0.0000059081,0.05000,3.042
0,mlp.gate_proj,0.0000067841,0.05000,3.079
0,mlp.down_proj,0.0000000707,0.05000,3.574
1,self_attn.q_proj,0.0000158107,0.05000,6.250
1,self_attn.k_proj,0.0000093271,0.05000,6.342
1,self_attn.v_proj,0.0000009569,0.05000,6.403
1,self_attn.o_proj,0.0000000717,0.05000,1.268
1,mlp.gate_proj,0.0000085050,0.05000,2.909
1,mlp.up_proj,0.0000074407,0.05000,2.938
1,mlp.down_proj,0.0000020780,0.05000,3.624
2,self_attn.v_proj,0.0000035664,0.05000,6.128
2,self_attn.k_proj,0.0000384627,0.05000,6.183
2,self_attn.q_proj,0.0000636528,0.05000,6.217
2,self_attn.o_proj,0.0000000483,0.05000,1.279
2,mlp.gate_proj,0.0000133714,0.05000,2.891
2,mlp.up_proj,0.0000112456,0.05000,2.918
2,mlp.down_proj,0.0000001836,0.05000,3.592
3,self_attn.k_proj,0.0000209325,0.05000,6.104
3,self_attn.q_proj,0.0000374590,0.05000,6.210
3,self_attn.v_proj,0.0000032397,0.05000,6.243
3,self_attn.o_proj,0.0000000519,0.05000,1.242
3,mlp.gate_proj,0.0000199032,0.05000,2.985
3,mlp.up_proj,0.0000147765,0.05000,3.037
3,mlp.down_proj,0.0000002623,0.05000,3.622
4,self_attn.k_proj,0.0000209495,0.05000,6.247
4,self_attn.v_proj,0.0000035178,0.05000,6.307
4,self_attn.q_proj,0.0000402571,0.05000,6.356
4,self_attn.o_proj,0.0000001683,0.05000,1.296
4,mlp.gate_proj,0.0000267916,0.05000,3.001
4,mlp.up_proj,0.0000176318,0.05000,3.032
4,mlp.down_proj,0.0000004458,0.05000,3.538
5,self_attn.q_proj,0.0000566906,0.05000,6.143
5,self_attn.k_proj,0.0000350725,0.05000,6.192
5,self_attn.v_proj,0.0000035867,0.05000,6.259
5,self_attn.o_proj,0.0000002498,0.05000,1.254
5,mlp.up_proj,0.0000200849,0.05000,2.861
5,mlp.gate_proj,0.0000287105,0.05000,2.907
5,mlp.down_proj,0.0000006270,0.05000,3.494
6,self_attn.q_proj,0.0000456653,0.05000,6.164
6,self_attn.k_proj,0.0000243565,0.05000,6.213
6,self_attn.v_proj,0.0000032720,0.05000,6.258
6,self_attn.o_proj,0.0000002966,0.05000,1.271
6,mlp.gate_proj,0.0000316156,0.05000,2.851
6,mlp.up_proj,0.0000221315,0.05000,2.875
6,mlp.down_proj,0.0000007019,0.05000,3.596
7,self_attn.k_proj,0.0000232553,0.05000,6.241
7,self_attn.q_proj,0.0000390834,0.05000,6.349
7,self_attn.v_proj,0.0000030140,0.05000,6.364
7,self_attn.o_proj,0.0000005020,0.05000,1.230
7,mlp.gate_proj,0.0000300552,0.05000,2.820
7,mlp.up_proj,0.0000227827,0.05000,2.857
7,mlp.down_proj,0.0000007818,0.05000,3.548
8,self_attn.k_proj,0.0000296915,0.05000,6.160
8,self_attn.q_proj,0.0000489327,0.05000,6.220
8,self_attn.v_proj,0.0000035765,0.05000,6.250
8,self_attn.o_proj,0.0000005180,0.05000,1.287
8,mlp.gate_proj,0.0000325682,0.05000,3.116
8,mlp.up_proj,0.0000241525,0.05000,3.147
8,mlp.down_proj,0.0000007777,0.05000,3.697
9,self_attn.k_proj,0.0000279815,0.05000,6.365
9,self_attn.q_proj,0.0000470294,0.05000,6.415
9,self_attn.v_proj,0.0000045795,0.05000,6.444
9,self_attn.o_proj,0.0000005254,0.05000,1.274
9,mlp.gate_proj,0.0000318904,0.05000,3.054
9,mlp.up_proj,0.0000243400,0.05000,3.097
9,mlp.down_proj,0.0000007917,0.05000,3.662
10,self_attn.v_proj,0.0000035862,0.05000,6.390
10,self_attn.k_proj,0.0000299603,0.05000,6.439
10,self_attn.q_proj,0.0000476768,0.05000,6.472
10,self_attn.o_proj,0.0000006423,0.05000,1.260
10,mlp.gate_proj,0.0000323488,0.05000,3.004
10,mlp.up_proj,0.0000264711,0.05000,3.029
10,mlp.down_proj,0.0000009133,0.05000,3.569
11,self_attn.v_proj,0.0000042222,0.05000,6.385
11,self_attn.q_proj,0.0000389616,0.05000,6.432
11,self_attn.k_proj,0.0000221590,0.05000,6.478
11,self_attn.o_proj,0.0000007108,0.05000,1.303
11,mlp.gate_proj,0.0000346920,0.05000,3.147
11,mlp.up_proj,0.0000293260,0.05000,3.189
11,mlp.down_proj,0.0000010680,0.05000,3.578
12,self_attn.v_proj,0.0000049074,0.05000,6.397
12,self_attn.q_proj,0.0000599314,0.05000,6.425
12,self_attn.k_proj,0.0000360839,0.05000,6.448
12,self_attn.o_proj,0.0000008251,0.05000,1.306
12,mlp.up_proj,0.0000314120,0.05000,3.121
12,mlp.gate_proj,0.0000372577,0.05000,3.155
12,mlp.down_proj,0.0000012126,0.05000,3.640
13,self_attn.k_proj,0.0000389678,0.05000,6.532
13,self_attn.q_proj,0.0000606843,0.05000,6.581
13,self_attn.v_proj,0.0000056146,0.05000,6.603
13,self_attn.o_proj,0.0000009978,0.05000,1.232
13,mlp.gate_proj,0.0000445419,0.05000,3.054
13,mlp.up_proj,0.0000351055,0.05000,3.085
13,mlp.down_proj,0.0000015760,0.05000,3.608
14,self_attn.q_proj,0.0000686343,0.05000,6.346
14,self_attn.k_proj,0.0000314004,0.05000,6.429
14,self_attn.v_proj,0.0000066387,0.05000,6.455
14,self_attn.o_proj,0.0000012561,0.05000,1.325
14,mlp.up_proj,0.0000375102,0.05000,2.993
14,mlp.gate_proj,0.0000478461,0.05000,3.019
14,mlp.down_proj,0.0000019180,0.05000,3.639
15,self_attn.v_proj,0.0000065177,0.05000,6.382
15,self_attn.q_proj,0.0000684232,0.05000,6.478
15,self_attn.k_proj,0.0000357806,0.05000,6.490
15,self_attn.o_proj,0.0000009062,0.05000,1.253
15,mlp.up_proj,0.0000374816,0.05000,3.019
15,mlp.gate_proj,0.0000510690,0.05000,3.046
15,mlp.down_proj,0.0000019322,0.05000,3.550
16,self_attn.q_proj,0.0000697884,0.05000,6.246
16,self_attn.v_proj,0.0000073443,0.05000,6.299
16,self_attn.k_proj,0.0000398958,0.05000,6.347
16,self_attn.o_proj,0.0000005650,0.05000,1.274
16,mlp.up_proj,0.0000372290,0.05000,3.057
16,mlp.gate_proj,0.0000518827,0.05000,3.110
16,mlp.down_proj,0.0000018123,0.05000,3.574
17,self_attn.v_proj,0.0000071509,0.05000,6.345
17,self_attn.k_proj,0.0000360828,0.05000,6.390
17,self_attn.q_proj,0.0000662234,0.05000,6.422
17,self_attn.o_proj,0.0000005764,0.05000,1.277
17,mlp.gate_proj,0.0000540872,0.05000,3.071
17,mlp.up_proj,0.0000382789,0.05000,3.121
17,mlp.down_proj,0.0000019723,0.05000,3.628
18,self_attn.q_proj,0.0000768960,0.05000,6.232
18,self_attn.v_proj,0.0000090087,0.05000,6.312
18,self_attn.k_proj,0.0000426919,0.05000,6.348
18,self_attn.o_proj,0.0000005129,0.05000,1.306
18,mlp.up_proj,0.0000403784,0.05000,3.084
18,mlp.gate_proj,0.0000557208,0.05000,3.102
18,mlp.down_proj,0.0000020153,0.05000,3.576
19,self_attn.k_proj,0.0000391191,0.05000,6.414
19,self_attn.q_proj,0.0000672697,0.05000,6.462
19,self_attn.v_proj,0.0000086852,0.05000,6.490
19,self_attn.o_proj,0.0000007129,0.05000,1.348
19,mlp.up_proj,0.0000428687,0.05000,3.091
19,mlp.gate_proj,0.0000584434,0.05000,3.129
19,mlp.down_proj,0.0000024692,0.05000,3.558
20,self_attn.v_proj,0.0000102000,0.05000,6.572
20,self_attn.k_proj,0.0000419316,0.05000,6.646
20,self_attn.q_proj,0.0000693657,0.05000,6.679
20,self_attn.o_proj,0.0000005730,0.05000,1.440
20,mlp.up_proj,0.0000435856,0.05000,3.067
20,mlp.gate_proj,0.0000564082,0.05000,3.126
20,mlp.down_proj,0.0000023884,0.05000,3.670
21,self_attn.k_proj,0.0000410597,0.05000,6.395
21,self_attn.v_proj,0.0000134331,0.05000,6.524
21,self_attn.q_proj,0.0000694150,0.05000,6.547
21,self_attn.o_proj,0.0000006532,0.05000,1.324
21,mlp.gate_proj,0.0000595703,0.05000,3.093
21,mlp.up_proj,0.0000455969,0.05000,3.126
21,mlp.down_proj,0.0000025119,0.05000,3.593
22,self_attn.k_proj,0.0000379314,0.05000,6.376
22,self_attn.q_proj,0.0000656641,0.05000,6.415
22,self_attn.v_proj,0.0000129559,0.05000,6.437
22,self_attn.o_proj,0.0000006026,0.05000,1.297
22,mlp.up_proj,0.0000487634,0.05000,3.087
22,mlp.gate_proj,0.0000640544,0.05000,3.115
22,mlp.down_proj,0.0000028967,0.05000,3.652
23,self_attn.q_proj,0.0000680379,0.05000,6.571
23,self_attn.v_proj,0.0000125475,0.05000,6.652
23,self_attn.k_proj,0.0000421066,0.05000,6.674
23,self_attn.o_proj,0.0000008360,0.05000,1.300
23,mlp.gate_proj,0.0000771958,0.05000,3.051
23,mlp.up_proj,0.0000553964,0.05000,3.084
23,mlp.down_proj,0.0000037663,0.05000,3.648
24,self_attn.q_proj,0.0000738199,0.05000,6.379
24,self_attn.v_proj,0.0000189734,0.05000,6.423
24,self_attn.k_proj,0.0000465050,0.05000,6.477
24,self_attn.o_proj,0.0000014266,0.05000,1.281
24,mlp.up_proj,0.0000596369,0.05000,3.043
24,mlp.gate_proj,0.0000848039,0.05000,3.081
24,mlp.down_proj,0.0000043019,0.05000,3.599
25,self_attn.k_proj,0.0000343078,0.05000,6.722
25,self_attn.q_proj,0.0000664509,0.05000,6.772
25,self_attn.v_proj,0.0000153221,0.05000,6.806
25,self_attn.o_proj,0.0000016806,0.05000,1.262
25,mlp.gate_proj,0.0000898007,0.05000,3.004
25,mlp.up_proj,0.0000626860,0.05000,3.051
25,mlp.down_proj,0.0000053409,0.05000,3.695
26,self_attn.v_proj,0.0000210691,0.05000,6.411
26,self_attn.q_proj,0.0000636958,0.05000,6.472
26,self_attn.k_proj,0.0000403479,0.05000,6.524
26,self_attn.o_proj,0.0000014542,0.05000,1.290
26,mlp.up_proj,0.0000650641,0.05000,3.043
26,mlp.gate_proj,0.0000945761,0.05000,3.087
26,mlp.down_proj,0.0000081644,0.05000,3.669
27,self_attn.v_proj,0.0000141159,0.05000,6.830
27,self_attn.k_proj,0.0000273797,0.05000,6.913
27,self_attn.q_proj,0.0000486121,0.05000,6.940
27,self_attn.o_proj,0.0000070236,0.05000,1.345
27,mlp.gate_proj,0.0000950958,0.05000,3.230
27,mlp.up_proj,0.0000753671,0.05000,3.255
27,mlp.down_proj,0.0000416984,0.05000,3.798
|