|
Loading checkpoint shards: 0%| | 0/7 [00:00<?, ?it/s]
Loading checkpoint shards: 14%|ββ | 1/7 [00:11<01:09, 11.62s/it]
Loading checkpoint shards: 29%|βββ | 2/7 [00:23<00:58, 11.65s/it]
Loading checkpoint shards: 43%|βββββ | 3/7 [00:34<00:46, 11.67s/it]
Loading checkpoint shards: 57%|ββββββ | 4/7 [00:46<00:34, 11.64s/it]
Loading checkpoint shards: 71%|ββββββββ | 5/7 [00:58<00:23, 11.68s/it]
Loading checkpoint shards: 86%|βββββββββ | 6/7 [01:09<00:11, 11.64s/it]
Loading checkpoint shards: 100%|ββββββββββ| 7/7 [01:16<00:00, 10.09s/it]
Loading checkpoint shards: 100%|ββββββββββ| 7/7 [01:16<00:00, 10.97s/it] |
| Found cached dataset json (/home/usbhost/.cache/huggingface/datasets/allenai___json/allenai--c4-6fbe877195f42de5/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51) |
| Found cached dataset json (/home/usbhost/.cache/huggingface/datasets/allenai___json/allenai--c4-efc3d4f4606f44bd/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51) |
| Starting ... |
| Ready. |
| 0 self_attn.k_proj |
| Quantizing ... |
| time 5.59 |
| error 248.51150512695312 |
| 0 self_attn.v_proj |
| Quantizing ... |
| time 4.59 |
| error 23.832942962646484 |
| 0 self_attn.q_proj |
| Quantizing ... |
| time 4.60 |
| error 208.9890899658203 |
| 0 self_attn.o_proj |
| Quantizing ... |
| time 5.35 |
| error 1.2797350883483887 |
| 0 mlp.up_proj |
| Quantizing ... |
| time 5.50 |
| error 85.32704162597656 |
| 0 mlp.gate_proj |
| Quantizing ... |
| time 4.67 |
| error 92.8025131225586 |
| 0 mlp.down_proj |
| Quantizing ... |
| time 15.19 |
| error 7.90690803527832 |
| 1 self_attn.k_proj |
| Quantizing ... |
| time 5.53 |
| error 491.87091064453125 |
| 1 self_attn.v_proj |
| Quantizing ... |
| time 4.61 |
| error 65.71781158447266 |
| 1 self_attn.q_proj |
| Quantizing ... |
| time 4.60 |
| error 481.22857666015625 |
| 1 self_attn.o_proj |
| Quantizing ... |
| time 5.37 |
| error 14.926952362060547 |
| 1 mlp.up_proj |
| Quantizing ... |
| time 5.54 |
| error 569.1054077148438 |
| 1 mlp.gate_proj |
| Quantizing ... |
| time 4.69 |
| error 651.648193359375 |
| 1 mlp.down_proj |
| Quantizing ... |
| time 15.20 |
| error 52.38072204589844 |
| 2 self_attn.k_proj |
| Quantizing ... |
| time 5.51 |
| error 1702.48681640625 |
| 2 self_attn.v_proj |
| Quantizing ... |
| time 4.59 |
| error 328.9317321777344 |
| 2 self_attn.q_proj |
| Quantizing ... |
| time 4.59 |
| error 1640.189453125 |
| 2 self_attn.o_proj |
| Quantizing ... |
| time 5.39 |
| error 32.7197380065918 |
| 2 mlp.up_proj |
| Quantizing ... |
| time 5.53 |
| error 1899.3172607421875 |
| 2 mlp.gate_proj |
| Quantizing ... |
| time 4.68 |
| error 2259.476806640625 |
| 2 mlp.down_proj |
| Quantizing ... |
| time 15.23 |
| error 115.92791748046875 |
| 3 self_attn.k_proj |
| Quantizing ... |
| time 5.52 |
| error 3070.20654296875 |
| 3 self_attn.v_proj |
| Quantizing ... |
| time 4.61 |
| error 640.16943359375 |
| 3 self_attn.q_proj |
| Quantizing ... |
| time 4.62 |
| error 2804.199462890625 |
| 3 self_attn.o_proj |
| Quantizing ... |
| time 5.36 |
| error 56.04383850097656 |
| 3 mlp.up_proj |
| Quantizing ... |
| time 5.53 |
| error 3143.75732421875 |
| 3 mlp.gate_proj |
| Quantizing ... |
| time 4.70 |
| error 3672.450439453125 |
| 3 mlp.down_proj |
| Quantizing ... |
| time 15.21 |
| error 455.0036315917969 |
| 4 self_attn.k_proj |
| Quantizing ... |
| time 5.52 |
| error 8661.1416015625 |
| 4 self_attn.v_proj |
| Quantizing ... |
| time 4.60 |
| error 3130.848388671875 |
| 4 self_attn.q_proj |
| Quantizing ... |
| time 4.60 |
| error 8500.8896484375 |
| 4 self_attn.o_proj |
| Quantizing ... |
| time 5.36 |
| error 73.76594543457031 |
| 4 mlp.up_proj |
| Quantizing ... |
| time 5.55 |
| error 4750.201171875 |
| 4 mlp.gate_proj |
| Quantizing ... |
| time 4.68 |
| error 5532.53076171875 |
| 4 mlp.down_proj |
| Quantizing ... |
| time 15.24 |
| error 274.5998229980469 |
| 5 self_attn.k_proj |
| Quantizing ... |
| time 5.53 |
| error 9668.17578125 |
| 5 self_attn.v_proj |
| Quantizing ... |
| time 4.60 |
| error 3808.4091796875 |
| 5 self_attn.q_proj |
| Quantizing ... |
| time 4.60 |
| error 9597.880859375 |
| 5 self_attn.o_proj |
| Quantizing ... |
| time 5.39 |
| error 96.71366882324219 |
| 5 mlp.up_proj |
| Quantizing ... |
| time 5.53 |
| error 5799.3759765625 |
| 5 mlp.gate_proj |
| Quantizing ... |
| time 4.68 |
| error 6807.18798828125 |
| 5 mlp.down_proj |
| Quantizing ... |
| time 15.21 |
| error 389.222900390625 |
| 6 self_attn.k_proj |
| Quantizing ... |
| time 5.52 |
| error 10976.859375 |
| 6 self_attn.v_proj |
| Quantizing ... |
| time 4.63 |
| error 4781.712890625 |
| 6 self_attn.q_proj |
| Quantizing ... |
| time 4.60 |
| error 11052.400390625 |
| 6 self_attn.o_proj |
| Quantizing ... |
| time 5.36 |
| error 126.38148498535156 |
| 6 mlp.up_proj |
| Quantizing ... |
| time 5.53 |
| error 7372.54296875 |
| 6 mlp.gate_proj |
| Quantizing ... |
| time 4.69 |
| error 8426.8515625 |
| 6 mlp.down_proj |
| Quantizing ... |
| time 15.25 |
| error 516.6248779296875 |
| 7 self_attn.k_proj |
| Quantizing ... |
| time 5.53 |
| error 12607.513671875 |
| 7 self_attn.v_proj |
| Quantizing ... |
| time 4.60 |
| error 5585.8876953125 |
| 7 self_attn.q_proj |
| Quantizing ... |
| time 4.59 |
| error 12423.859375 |
| 7 self_attn.o_proj |
| Quantizing ... |
| time 5.37 |
| error 216.31124877929688 |
| 7 mlp.up_proj |
| Quantizing ... |
| time 5.53 |
| error 8981.8994140625 |
| 7 mlp.gate_proj |
| Quantizing ... |
| time 4.68 |
| error 10161.09375 |
| 7 mlp.down_proj |
| Quantizing ... |
| time 15.23 |
| error 673.1290283203125 |
| 8 self_attn.k_proj |
| Quantizing ... |
| time 5.52 |
| error 13563.486328125 |
| 8 self_attn.v_proj |
| Quantizing ... |
| time 4.61 |
| error 6292.990234375 |
| 8 self_attn.q_proj |
| Quantizing ... |
| time 4.60 |
| error 13392.42578125 |
| 8 self_attn.o_proj |
| Quantizing ... |
| time 5.35 |
| error 296.80194091796875 |
| 8 mlp.up_proj |
| Quantizing ... |
| time 5.53 |
| error 10729.47265625 |
| 8 mlp.gate_proj |
| Quantizing ... |
| time 4.68 |
| error 12080.126953125 |
| 8 mlp.down_proj |
| Quantizing ... |
| time 15.19 |
| error 850.3040161132812 |
| 9 self_attn.k_proj |
| Quantizing ... |
| time 5.51 |
| error 13440.3017578125 |
| 9 self_attn.v_proj |
| Quantizing ... |
| time 4.62 |
| error 6355.3544921875 |
| 9 self_attn.q_proj |
| Quantizing ... |
| time 4.59 |
| error 12908.4482421875 |
| 9 self_attn.o_proj |
| Quantizing ... |
| time 5.34 |
| error 356.29302978515625 |
| 9 mlp.up_proj |
| Quantizing ... |
| time 5.52 |
| error 12409.3623046875 |
| 9 mlp.gate_proj |
| Quantizing ... |
| time 4.69 |
| error 13892.767578125 |
| 9 mlp.down_proj |
| Quantizing ... |
| time 15.18 |
| error 1032.557861328125 |
| 10 self_attn.k_proj |
| Quantizing ... |
| time 5.19 |
| error 15417.884765625 |
| 10 self_attn.v_proj |
| Quantizing ... |
| time 4.50 |
| error 7487.8876953125 |
| 10 self_attn.q_proj |
| Quantizing ... |
| time 4.58 |
| error 14854.5859375 |
| 10 self_attn.o_proj |
| Quantizing ... |
| time 4.65 |
| error 522.43212890625 |
| 10 mlp.up_proj |
| Quantizing ... |
| time 5.53 |
| error 13540.619140625 |
| 10 mlp.gate_proj |
| Quantizing ... |
| time 4.68 |
| error 14910.919921875 |
| 10 mlp.down_proj |
| Quantizing ... |
| time 15.21 |
| error 1451.3995361328125 |
| 11 self_attn.k_proj |
| Quantizing ... |
| time 5.45 |
| error 19503.8125 |
| 11 self_attn.v_proj |
| Quantizing ... |
| time 4.54 |
| error 10578.271484375 |
| 11 self_attn.q_proj |
| Quantizing ... |
| time 4.53 |
| error 19087.310546875 |
| 11 self_attn.o_proj |
| Quantizing ... |
| time 5.29 |
| error 460.1400146484375 |
| 11 mlp.up_proj |
| Quantizing ... |
| time 5.46 |
| error 15259.3271484375 |
| 11 mlp.gate_proj |
| Quantizing ... |
| time 4.62 |
| error 16558.69921875 |
| 11 mlp.down_proj |
| Quantizing ... |
| time 15.07 |
| error 1355.914794921875 |
| 12 self_attn.k_proj |
| Quantizing ... |
| time 5.46 |
| error 19905.421875 |
| 12 self_attn.v_proj |
| Quantizing ... |
| time 4.53 |
| error 10467.2255859375 |
| 12 self_attn.q_proj |
| Quantizing ... |
| time 4.52 |
| error 19042.11328125 |
| 12 self_attn.o_proj |
| Quantizing ... |
| time 5.30 |
| error 554.3314208984375 |
| 12 mlp.up_proj |
| Quantizing ... |
| time 5.46 |
| error 16581.44921875 |
| 12 mlp.gate_proj |
| Quantizing ... |
| time 4.63 |
| error 17876.6640625 |
| 12 mlp.down_proj |
| Quantizing ... |
| time 14.99 |
| error 1520.2266845703125 |
| 13 self_attn.k_proj |
| Quantizing ... |
| time 5.40 |
| error 18666.96875 |
| 13 self_attn.v_proj |
| Quantizing ... |
| time 4.50 |
| error 9848.2939453125 |
| 13 self_attn.q_proj |
| Quantizing ... |
| time 4.48 |
| error 17859.89453125 |
| 13 self_attn.o_proj |
| Quantizing ... |
| time 5.27 |
| error 736.5158081054688 |
| 13 mlp.up_proj |
| Quantizing ... |
| time 5.44 |
| error 17094.962890625 |
| 13 mlp.gate_proj |
| Quantizing ... |
| time 4.60 |
| error 17981.009765625 |
| 13 mlp.down_proj |
| Quantizing ... |
| time 14.99 |
| error 1649.564697265625 |
| 14 self_attn.k_proj |
| Quantizing ... |
| time 5.43 |
| error 19464.08984375 |
| 14 self_attn.v_proj |
| Quantizing ... |
| time 4.51 |
| error 10684.158203125 |
| 14 self_attn.q_proj |
| Quantizing ... |
| time 4.51 |
| error 18777.158203125 |
| 14 self_attn.o_proj |
| Quantizing ... |
| time 5.27 |
| error 666.4788818359375 |
| 14 mlp.up_proj |
| Quantizing ... |
| time 5.43 |
| error 18003.3359375 |
| 14 mlp.gate_proj |
| Quantizing ... |
| time 4.58 |
| error 18659.9375 |
| 14 mlp.down_proj |
| Quantizing ... |
| time 14.99 |
| error 1756.21533203125 |
| 15 self_attn.k_proj |
| Quantizing ... |
| time 5.42 |
| error 22731.232421875 |
| 15 self_attn.v_proj |
| Quantizing ... |
| time 4.53 |
| error 13153.8212890625 |
| 15 self_attn.q_proj |
| Quantizing ... |
| time 4.56 |
| error 22046.060546875 |
| 15 self_attn.o_proj |
| Quantizing ... |
| time 5.33 |
| error 927.9457397460938 |
| 15 mlp.up_proj |
| Quantizing ... |
| time 5.49 |
| error 18842.2421875 |
| 15 mlp.gate_proj |
| Quantizing ... |
| time 4.64 |
| error 19572.6171875 |
| 15 mlp.down_proj |
| Quantizing ... |
| time 15.11 |
| error 2022.401123046875 |
| 16 self_attn.k_proj |
| Quantizing ... |
| time 5.48 |
| error 23348.453125 |
| 16 self_attn.v_proj |
| Quantizing ... |
| time 4.56 |
| error 14359.458984375 |
| 16 self_attn.q_proj |
| Quantizing ... |
| time 4.56 |
| error 22662.978515625 |
| 16 self_attn.o_proj |
| Quantizing ... |
| time 5.32 |
| error 903.6290893554688 |
| 16 mlp.up_proj |
| Quantizing ... |
| time 5.49 |
| error 20053.740234375 |
| 16 mlp.gate_proj |
| Quantizing ... |
| time 4.64 |
| error 20500.84765625 |
| 16 mlp.down_proj |
| Quantizing ... |
| time 15.12 |
| error 2178.75537109375 |
| 17 self_attn.k_proj |
| Quantizing ... |
| time 5.48 |
| error 22102.595703125 |
| 17 self_attn.v_proj |
| Quantizing ... |
| time 4.56 |
| error 13796.431640625 |
| 17 self_attn.q_proj |
| Quantizing ... |
| time 4.56 |
| error 21351.3203125 |
| 17 self_attn.o_proj |
| Quantizing ... |
| time 5.35 |
| error 914.3590087890625 |
| 17 mlp.up_proj |
| Quantizing ... |
| time 5.50 |
| error 21227.931640625 |
| 17 mlp.gate_proj |
| Quantizing ... |
| time 4.66 |
| error 21392.0234375 |
| 17 mlp.down_proj |
| Quantizing ... |
| time 15.16 |
| error 2325.583740234375 |
| 18 self_attn.k_proj |
| Quantizing ... |
| time 5.49 |
| error 23322.21484375 |
| 18 self_attn.v_proj |
| Quantizing ... |
| time 4.58 |
| error 14129.474609375 |
| 18 self_attn.q_proj |
| Quantizing ... |
| time 4.56 |
| error 22335.5390625 |
| 18 self_attn.o_proj |
| Quantizing ... |
| time 5.32 |
| error 1253.7630615234375 |
| 18 mlp.up_proj |
| Quantizing ... |
| time 5.51 |
| error 22060.33203125 |
| 18 mlp.gate_proj |
| Quantizing ... |
| time 4.68 |
| error 22286.359375 |
| 18 mlp.down_proj |
| Quantizing ... |
| time 15.18 |
| error 2743.5048828125 |
| 19 self_attn.k_proj |
| Quantizing ... |
| time 5.50 |
| error 22394.88671875 |
| 19 self_attn.v_proj |
| Quantizing ... |
| time 4.58 |
| error 14984.123046875 |
| 19 self_attn.q_proj |
| Quantizing ... |
| time 4.59 |
| error 21649.625 |
| 19 self_attn.o_proj |
| Quantizing ... |
| time 5.36 |
| error 1130.92822265625 |
| 19 mlp.up_proj |
| Quantizing ... |
| time 5.53 |
| error 23244.22265625 |
| 19 mlp.gate_proj |
| Quantizing ... |
| time 4.69 |
| error 23601.015625 |
| 19 mlp.down_proj |
| Quantizing ... |
| time 15.26 |
| error 3028.933349609375 |
| 20 self_attn.k_proj |
| Quantizing ... |
| time 5.51 |
| error 22318.123046875 |
| 20 self_attn.v_proj |
| Quantizing ... |
| time 4.60 |
| error 15997.8583984375 |
| 20 self_attn.q_proj |
| Quantizing ... |
| time 4.60 |
| error 21778.65234375 |
| 20 self_attn.o_proj |
| Quantizing ... |
| time 5.36 |
| error 1066.665283203125 |
| 20 mlp.up_proj |
| Quantizing ... |
| time 5.53 |
| error 25091.755859375 |
| 20 mlp.gate_proj |
| Quantizing ... |
| time 4.69 |
| error 25606.6796875 |
| 20 mlp.down_proj |
| Quantizing ... |
| time 15.15 |
| error 3200.33935546875 |
| 21 self_attn.k_proj |
| Quantizing ... |
| time 5.50 |
| error 22088.099609375 |
| 21 self_attn.v_proj |
| Quantizing ... |
| time 4.45 |
| error 15016.916015625 |
| 21 self_attn.q_proj |
| Quantizing ... |
| time 3.92 |
| error 21347.455078125 |
| 21 self_attn.o_proj |
| Quantizing ... |
| time 4.65 |
| error 1443.336669921875 |
| 21 mlp.up_proj |
| Quantizing ... |
| time 4.79 |
| error 25449.8359375 |
| 21 mlp.gate_proj |
| Quantizing ... |
| time 3.96 |
| error 25958.3515625 |
| 21 mlp.down_proj |
| Quantizing ... |
| time 13.25 |
| error 3697.61669921875 |
| 22 self_attn.k_proj |
| Quantizing ... |
| time 5.51 |
| error 17819.23046875 |
| 22 self_attn.v_proj |
| Quantizing ... |
| time 4.59 |
| error 14577.216796875 |
| 22 self_attn.q_proj |
| Quantizing ... |
| time 4.57 |
| error 17515.81640625 |
| 22 self_attn.o_proj |
| Quantizing ... |
| time 5.35 |
| error 1200.3115234375 |
| 22 mlp.up_proj |
| Quantizing ... |
| time 5.52 |
| error 27206.17578125 |
| 22 mlp.gate_proj |
| Quantizing ... |
| time 4.66 |
| error 27972.80859375 |
| 22 mlp.down_proj |
| Quantizing ... |
| time 15.22 |
| error 4049.857177734375 |
| 23 self_attn.k_proj |
| Quantizing ... |
| time 5.50 |
| error 22719.404296875 |
| 23 self_attn.v_proj |
| Quantizing ... |
| time 4.58 |
| error 17897.3828125 |
| 23 self_attn.q_proj |
| Quantizing ... |
| time 4.58 |
| error 22223.16015625 |
| 23 self_attn.o_proj |
| Quantizing ... |
| time 5.34 |
| error 1186.746826171875 |
| 23 mlp.up_proj |
| Quantizing ... |
| time 5.51 |
| error 28716.908203125 |
| 23 mlp.gate_proj |
| Quantizing ... |
| time 4.67 |
| error 29901.70703125 |
| 23 mlp.down_proj |
| Quantizing ... |
| time 15.15 |
| error 4423.9609375 |
| 24 self_attn.k_proj |
| Quantizing ... |
| time 5.51 |
| error 21154.1796875 |
| 24 self_attn.v_proj |
| Quantizing ... |
| time 4.59 |
| error 17285.16015625 |
| 24 self_attn.q_proj |
| Quantizing ... |
| time 4.59 |
| error 20868.7890625 |
| 24 self_attn.o_proj |
| Quantizing ... |
| time 5.35 |
| error 1467.7769775390625 |
| 24 mlp.up_proj |
| Quantizing ... |
| time 5.48 |
| error 29462.51953125 |
| 24 mlp.gate_proj |
| Quantizing ... |
| time 4.66 |
| error 30764.58984375 |
| 24 mlp.down_proj |
| Quantizing ... |
| time 15.18 |
| error 4902.71728515625 |
| 25 self_attn.k_proj |
| Quantizing ... |
| time 5.49 |
| error 22048.046875 |
| 25 self_attn.v_proj |
| Quantizing ... |
| time 4.57 |
| error 16228.04296875 |
| 25 self_attn.q_proj |
| Quantizing ... |
| time 4.58 |
| error 21143.0 |
| 25 self_attn.o_proj |
| Quantizing ... |
| time 5.34 |
| error 2139.94775390625 |
| 25 mlp.up_proj |
| Quantizing ... |
| time 5.52 |
| error 30151.56640625 |
| 25 mlp.gate_proj |
| Quantizing ... |
| time 4.68 |
| error 32173.623046875 |
| 25 mlp.down_proj |
| Quantizing ... |
| time 15.19 |
| error 5837.19970703125 |
| 26 self_attn.k_proj |
| Quantizing ... |
| time 5.50 |
| error 23134.4375 |
| 26 self_attn.v_proj |
| Quantizing ... |
| time 4.58 |
| error 20564.1015625 |
| 26 self_attn.q_proj |
| Quantizing ... |
| time 4.57 |
| error 22738.328125 |
| 26 self_attn.o_proj |
| Quantizing ... |
| time 5.32 |
| error 1489.8433837890625 |
| 26 mlp.up_proj |
| Quantizing ... |
| time 5.52 |
| error 32375.87890625 |
| 26 mlp.gate_proj |
| Quantizing ... |
| time 4.68 |
| error 35263.8046875 |
| 26 mlp.down_proj |
| Quantizing ... |
| time 15.16 |
| error 6332.103515625 |
| 27 self_attn.k_proj |
| Quantizing ... |
| time 5.50 |
| error 18952.2265625 |
| 27 self_attn.v_proj |
| Quantizing ... |
| time 4.58 |
| error 17621.71875 |
| 27 self_attn.q_proj |
| Quantizing ... |
| time 4.58 |
| error 18678.576171875 |
| 27 self_attn.o_proj |
| Quantizing ... |
| time 5.33 |
| error 948.7327880859375 |
| 27 mlp.up_proj |
| Quantizing ... |
| time 5.53 |
| error 34296.54296875 |
| 27 mlp.gate_proj |
| Quantizing ... |
| time 4.68 |
| error 38023.46875 |
| 27 mlp.down_proj |
| Quantizing ... |
| time 15.14 |
| error 6713.2685546875 |
| 28 self_attn.k_proj |
| Quantizing ... |
| time 5.44 |
| error 21827.68359375 |
| 28 self_attn.v_proj |
| Quantizing ... |
| time 4.51 |
| error 21026.181640625 |
| 28 self_attn.q_proj |
| Quantizing ... |
| time 4.52 |
| error 21629.984375 |
| 28 self_attn.o_proj |
| Quantizing ... |
| time 5.28 |
| error 1200.962158203125 |
| 28 mlp.up_proj |
| Quantizing ... |
| time 4.73 |
| error 35941.671875 |
| 28 mlp.gate_proj |
| Quantizing ... |
| time 3.89 |
| error 40337.671875 |
| 28 mlp.down_proj |
| Quantizing ... |
| time 13.09 |
| error 7115.2939453125 |
| 29 self_attn.k_proj |
| Quantizing ... |
| time 4.71 |
| error 18534.94921875 |
| 29 self_attn.v_proj |
| Quantizing ... |
| time 3.81 |
| error 18983.3828125 |
| 29 self_attn.q_proj |
| Quantizing ... |
| time 3.79 |
| error 18497.5078125 |
| 29 self_attn.o_proj |
| Quantizing ... |
| time 4.56 |
| error 1055.707763671875 |
| 29 mlp.up_proj |
| Quantizing ... |
| time 4.73 |
| error 37311.1328125 |
| 29 mlp.gate_proj |
| Quantizing ... |
| time 3.89 |
| error 42287.8046875 |
| 29 mlp.down_proj |
| Quantizing ... |
| time 13.09 |
| error 7297.30322265625 |
| 30 self_attn.k_proj |
| Quantizing ... |
| time 4.73 |
| error 19803.091796875 |
| 30 self_attn.v_proj |
| Quantizing ... |
| time 3.82 |
| error 19785.2578125 |
| 30 self_attn.q_proj |
| Quantizing ... |
| time 3.79 |
| error 19760.001953125 |
| 30 self_attn.o_proj |
| Quantizing ... |
| time 4.54 |
| error 1469.172119140625 |
| 30 mlp.up_proj |
| Quantizing ... |
| time 4.73 |
| error 38499.63671875 |
| 30 mlp.gate_proj |
| Quantizing ... |
| time 3.89 |
| error 44179.5 |
| 30 mlp.down_proj |
| Quantizing ... |
| time 13.09 |
| error 7580.81689453125 |
| 31 self_attn.k_proj |
| Quantizing ... |
| time 4.73 |
| error 17820.888671875 |
| 31 self_attn.v_proj |
| Quantizing ... |
| time 3.81 |
| error 18094.15234375 |
| 31 self_attn.q_proj |
| Quantizing ... |
| time 3.79 |
| error 17839.12890625 |
| 31 self_attn.o_proj |
| Quantizing ... |
| time 4.55 |
| error 865.2473754882812 |
| 31 mlp.up_proj |
| Quantizing ... |
| time 4.73 |
| error 39930.4765625 |
| 31 mlp.gate_proj |
| Quantizing ... |
| time 3.90 |
| error 46099.22265625 |
| 31 mlp.down_proj |
| Quantizing ... |
| time 13.07 |
| error 7856.654296875 |
| 32 self_attn.k_proj |
| Quantizing ... |
| time 4.72 |
| error 17320.09375 |
| 32 self_attn.v_proj |
| Quantizing ... |
| time 3.81 |
| error 18277.00390625 |
| 32 self_attn.q_proj |
| Quantizing ... |
| time 3.78 |
| error 17301.69140625 |
| 32 self_attn.o_proj |
| Quantizing ... |
| time 4.55 |
| error 1483.154052734375 |
| 32 mlp.up_proj |
| Quantizing ... |
| time 4.73 |
| error 40772.7578125 |
| 32 mlp.gate_proj |
| Quantizing ... |
| time 3.91 |
| error 47268.8828125 |
| 32 mlp.down_proj |
| Quantizing ... |
| time 15.05 |
| error 8160.94189453125 |
| 33 self_attn.k_proj |
| Quantizing ... |
| time 5.43 |
| error 21955.51953125 |
| 33 self_attn.v_proj |
| Quantizing ... |
| time 4.52 |
| error 21049.22265625 |
| 33 self_attn.q_proj |
| Quantizing ... |
| time 4.51 |
| error 21577.25390625 |
| 33 self_attn.o_proj |
| Quantizing ... |
| time 5.28 |
| error 1723.208251953125 |
| 33 mlp.up_proj |
| Quantizing ... |
| time 5.45 |
| error 41624.625 |
| 33 mlp.gate_proj |
| Quantizing ... |
| time 4.62 |
| error 48574.9140625 |
| 33 mlp.down_proj |
| Quantizing ... |
| time 15.02 |
| error 8516.169921875 |
| 34 self_attn.k_proj |
| Quantizing ... |
| time 5.45 |
| error 21276.6171875 |
| 34 self_attn.v_proj |
| Quantizing ... |
| time 4.53 |
| error 21605.98828125 |
| 34 self_attn.q_proj |
| Quantizing ... |
| time 4.52 |
| error 21282.73828125 |
| 34 self_attn.o_proj |
| Quantizing ... |
| time 5.29 |
| error 1147.412109375 |
| 34 mlp.up_proj |
| Quantizing ... |
| time 5.47 |
| error 43007.8515625 |
| 34 mlp.gate_proj |
| Quantizing ... |
| time 4.62 |
| error 50336.4453125 |
| 34 mlp.down_proj |
| Quantizing ... |
| time 15.03 |
| error 8801.716796875 |
| 35 self_attn.k_proj |
| Quantizing ... |
| time 5.42 |
| error 19654.30078125 |
| 35 self_attn.v_proj |
| Quantizing ... |
| time 4.53 |
| error 20460.298828125 |
| 35 self_attn.q_proj |
| Quantizing ... |
| time 4.51 |
| error 19565.66015625 |
| 35 self_attn.o_proj |
| Quantizing ... |
| time 5.28 |
| error 1267.8861083984375 |
| 35 mlp.up_proj |
| Quantizing ... |
| time 5.44 |
| error 43844.3984375 |
| 35 mlp.gate_proj |
| Quantizing ... |
| time 4.61 |
| error 51832.9296875 |
| 35 mlp.down_proj |
| Quantizing ... |
| time 14.99 |
| error 9246.138671875 |
| 36 self_attn.k_proj |
| Quantizing ... |
| time 5.42 |
| error 21818.0546875 |
| 36 self_attn.v_proj |
| Quantizing ... |
| time 4.51 |
| error 22970.7421875 |
| 36 self_attn.q_proj |
| Quantizing ... |
| time 4.51 |
| error 21755.69140625 |
| 36 self_attn.o_proj |
| Quantizing ... |
| time 5.27 |
| error 1475.30712890625 |
| 36 mlp.up_proj |
| Quantizing ... |
| time 5.45 |
| error 45133.0 |
| 36 mlp.gate_proj |
| Quantizing ... |
| time 4.62 |
| error 53537.16796875 |
| 36 mlp.down_proj |
| Quantizing ... |
| time 15.01 |
| error 9547.609375 |
| 37 self_attn.k_proj |
| Quantizing ... |
| time 5.42 |
| error 19679.92578125 |
| 37 self_attn.v_proj |
| Quantizing ... |
| time 4.49 |
| error 21583.8515625 |
| 37 self_attn.q_proj |
| Quantizing ... |
| time 4.46 |
| error 19695.486328125 |
| 37 self_attn.o_proj |
| Quantizing ... |
| time 5.27 |
| error 1159.252197265625 |
| 37 mlp.up_proj |
| Quantizing ... |
| time 5.47 |
| error 46412.2890625 |
| 37 mlp.gate_proj |
| Quantizing ... |
| time 4.62 |
| error 54849.98828125 |
| 37 mlp.down_proj |
| Quantizing ... |
| time 15.02 |
| error 9819.6611328125 |
| 38 self_attn.k_proj |
| Quantizing ... |
| time 5.44 |
| error 18576.4296875 |
| 38 self_attn.v_proj |
| Quantizing ... |
| time 4.53 |
| error 22137.95703125 |
| 38 self_attn.q_proj |
| Quantizing ... |
| time 4.50 |
| error 18715.9453125 |
| 38 self_attn.o_proj |
| Quantizing ... |
| time 5.27 |
| error 1157.175537109375 |
| 38 mlp.up_proj |
| Quantizing ... |
| time 5.47 |
| error 47867.8359375 |
| 38 mlp.gate_proj |
| Quantizing ... |
| time 4.63 |
| error 56564.65625 |
| 38 mlp.down_proj |
| Quantizing ... |
| time 15.05 |
| error 10064.912109375 |
| 39 self_attn.k_proj |
| Quantizing ... |
| time 5.43 |
| error 18319.57421875 |
| 39 self_attn.v_proj |
| Quantizing ... |
| time 4.52 |
| error 21200.6953125 |
| 39 self_attn.q_proj |
| Quantizing ... |
| time 4.51 |
| error 18492.78515625 |
| 39 self_attn.o_proj |
| Quantizing ... |
| time 5.28 |
| error 871.6390380859375 |
| 39 mlp.up_proj |
| Quantizing ... |
| time 5.49 |
| error 49243.14453125 |
| 39 mlp.gate_proj |
| Quantizing ... |
| time 4.64 |
| error 57997.484375 |
| 39 mlp.down_proj |
| Quantizing ... |
| time 15.02 |
| error 10489.1953125 |
| 40 self_attn.k_proj |
| Quantizing ... |
| time 5.45 |
| error 19263.89453125 |
| 40 self_attn.v_proj |
| Quantizing ... |
| time 4.52 |
| error 23175.212890625 |
| 40 self_attn.q_proj |
| Quantizing ... |
| time 4.52 |
| error 19302.951171875 |
| 40 self_attn.o_proj |
| Quantizing ... |
| time 5.31 |
| error 1094.509033203125 |
| 40 mlp.up_proj |
| Quantizing ... |
| time 5.48 |
| error 50629.12109375 |
| 40 mlp.gate_proj |
| Quantizing ... |
| time 4.63 |
| error 59236.71875 |
| 40 mlp.down_proj |
| Quantizing ... |
| time 15.04 |
| error 10625.0810546875 |
| 41 self_attn.k_proj |
| Quantizing ... |
| time 5.45 |
| error 16903.75 |
| 41 self_attn.v_proj |
| Quantizing ... |
| time 4.52 |
| error 20939.875 |
| 41 self_attn.q_proj |
| Quantizing ... |
| time 4.52 |
| error 17243.86328125 |
| 41 self_attn.o_proj |
| Quantizing ... |
| time 5.28 |
| error 1089.82421875 |
| 41 mlp.up_proj |
| Quantizing ... |
| time 5.46 |
| error 51843.125 |
| 41 mlp.gate_proj |
| Quantizing ... |
| time 4.63 |
| error 60519.8515625 |
| 41 mlp.down_proj |
| Quantizing ... |
| time 15.00 |
| error 10861.064453125 |
| 42 self_attn.k_proj |
| Quantizing ... |
| time 5.42 |
| error 15238.4775390625 |
| 42 self_attn.v_proj |
| Quantizing ... |
| time 4.51 |
| error 18795.26171875 |
| 42 self_attn.q_proj |
| Quantizing ... |
| time 4.52 |
| error 15420.541015625 |
| 42 self_attn.o_proj |
| Quantizing ... |
| time 5.29 |
| error 784.9866943359375 |
| 42 mlp.up_proj |
| Quantizing ... |
| time 5.46 |
| error 53463.21875 |
| 42 mlp.gate_proj |
| Quantizing ... |
| time 4.62 |
| error 62206.3515625 |
| 42 mlp.down_proj |
| Quantizing ... |
| time 15.02 |
| error 11189.00390625 |
| 43 self_attn.k_proj |
| Quantizing ... |
| time 5.50 |
| error 20108.013671875 |
| 43 self_attn.v_proj |
| Quantizing ... |
| time 4.59 |
| error 23657.625 |
| 43 self_attn.q_proj |
| Quantizing ... |
| time 4.59 |
| error 20269.19140625 |
| 43 self_attn.o_proj |
| Quantizing ... |
| time 5.35 |
| error 1607.70849609375 |
| 43 mlp.up_proj |
| Quantizing ... |
| time 5.53 |
| error 55084.65234375 |
| 43 mlp.gate_proj |
| Quantizing ... |
| time 4.68 |
| error 64042.7734375 |
| 43 mlp.down_proj |
| Quantizing ... |
| time 15.19 |
| error 11612.2919921875 |
| 44 self_attn.k_proj |
| Quantizing ... |
| time 5.50 |
| error 17356.1171875 |
| 44 self_attn.v_proj |
| Quantizing ... |
| time 4.59 |
| error 21160.546875 |
| 44 self_attn.q_proj |
| Quantizing ... |
| time 4.58 |
| error 17603.23828125 |
| 44 self_attn.o_proj |
| Quantizing ... |
| time 5.34 |
| error 1255.604248046875 |
| 44 mlp.up_proj |
| Quantizing ... |
| time 5.53 |
| error 56339.96875 |
| 44 mlp.gate_proj |
| Quantizing ... |
| time 4.70 |
| error 65163.125 |
| 44 mlp.down_proj |
| Quantizing ... |
| time 15.20 |
| error 12064.7138671875 |
| 45 self_attn.k_proj |
| Quantizing ... |
| time 5.50 |
| error 18117.7890625 |
| 45 self_attn.v_proj |
| Quantizing ... |
| time 4.59 |
| error 21808.15625 |
| 45 self_attn.q_proj |
| Quantizing ... |
| time 4.59 |
| error 18116.814453125 |
| 45 self_attn.o_proj |
| Quantizing ... |
| time 5.35 |
| error 1263.494140625 |
| 45 mlp.up_proj |
| Quantizing ... |
| time 5.52 |
| error 57673.00390625 |
| 45 mlp.gate_proj |
| Quantizing ... |
| time 4.69 |
| error 66421.8984375 |
| 45 mlp.down_proj |
| Quantizing ... |
| time 15.22 |
| error 12156.66015625 |
| 46 self_attn.k_proj |
| Quantizing ... |
| time 5.50 |
| error 14839.7373046875 |
| 46 self_attn.v_proj |
| Quantizing ... |
| time 4.59 |
| error 18257.892578125 |
| 46 self_attn.q_proj |
| Quantizing ... |
| time 4.60 |
| error 15213.90625 |
| 46 self_attn.o_proj |
| Quantizing ... |
| time 5.34 |
| error 785.2659912109375 |
| 46 mlp.up_proj |
| Quantizing ... |
| time 5.53 |
| error 58797.3125 |
| 46 mlp.gate_proj |
| Quantizing ... |
| time 4.68 |
| error 67662.4375 |
| 46 mlp.down_proj |
| Quantizing ... |
| time 15.22 |
| error 12578.89453125 |
| 47 self_attn.k_proj |
| Quantizing ... |
| time 5.51 |
| error 15980.6748046875 |
| 47 self_attn.v_proj |
| Quantizing ... |
| time 4.59 |
| error 20399.40625 |
| 47 self_attn.q_proj |
| Quantizing ... |
| time 4.58 |
| error 16314.845703125 |
| 47 self_attn.o_proj |
| Quantizing ... |
| time 5.34 |
| error 994.2510375976562 |
| 47 mlp.up_proj |
| Quantizing ... |
| time 5.55 |
| error 60194.90625 |
| 47 mlp.gate_proj |
| Quantizing ... |
| time 4.69 |
| error 69083.8046875 |
| 47 mlp.down_proj |
| Quantizing ... |
| time 15.22 |
| error 12717.556640625 |
| 48 self_attn.k_proj |
| Quantizing ... |
| time 5.51 |
| error 16732.21484375 |
| 48 self_attn.v_proj |
| Quantizing ... |
| time 4.59 |
| error 20331.4609375 |
| 48 self_attn.q_proj |
| Quantizing ... |
| time 4.59 |
| error 16550.6796875 |
| 48 self_attn.o_proj |
| Quantizing ... |
| time 5.37 |
| error 830.1424560546875 |
| 48 mlp.up_proj |
| Quantizing ... |
| time 5.53 |
| error 61524.6640625 |
| 48 mlp.gate_proj |
| Quantizing ... |
| time 4.68 |
| error 69989.859375 |
| 48 mlp.down_proj |
| Quantizing ... |
| time 15.25 |
| error 12867.81640625 |
| 49 self_attn.k_proj |
| Quantizing ... |
| time 5.50 |
| error 20927.19921875 |
| 49 self_attn.v_proj |
| Quantizing ... |
| time 4.59 |
| error 26998.1875 |
| 49 self_attn.q_proj |
| Quantizing ... |
| time 4.58 |
| error 21068.333984375 |
| 49 self_attn.o_proj |
| Quantizing ... |
| time 5.33 |
| error 2200.43896484375 |
| 49 mlp.up_proj |
| Quantizing ... |
| time 5.52 |
| error 62553.7578125 |
| 49 mlp.gate_proj |
| Quantizing ... |
| time 4.68 |
| error 70532.5 |
| 49 mlp.down_proj |
| Quantizing ... |
| time 15.24 |
| error 13343.169921875 |
| 50 self_attn.k_proj |
| Quantizing ... |
| time 5.51 |
| error 19778.150390625 |
| 50 self_attn.v_proj |
| Quantizing ... |
| time 4.60 |
| error 24383.1171875 |
| 50 self_attn.q_proj |
| Quantizing ... |
| time 4.59 |
| error 19944.087890625 |
| 50 self_attn.o_proj |
| Quantizing ... |
| time 5.34 |
| error 1318.1099853515625 |
| 50 mlp.up_proj |
| Quantizing ... |
| time 5.53 |
| error 63662.0 |
| 50 mlp.gate_proj |
| Quantizing ... |
| time 4.71 |
| error 70943.359375 |
| 50 mlp.down_proj |
| Quantizing ... |
| time 15.23 |
| error 13458.142578125 |
| 51 self_attn.k_proj |
| Quantizing ... |
| time 5.51 |
| error 19395.578125 |
| 51 self_attn.v_proj |
| Quantizing ... |
| time 4.61 |
| error 23529.4296875 |
| 51 self_attn.q_proj |
| Quantizing ... |
| time 4.59 |
| error 19515.13671875 |
| 51 self_attn.o_proj |
| Quantizing ... |
| time 5.35 |
| error 1308.77294921875 |
| 51 mlp.up_proj |
| Quantizing ... |
| time 5.54 |
| error 64472.87890625 |
| 51 mlp.gate_proj |
| Quantizing ... |
| time 4.70 |
| error 71128.5078125 |
| 51 mlp.down_proj |
| Quantizing ... |
| time 15.24 |
| error 13829.0478515625 |
| 52 self_attn.k_proj |
| Quantizing ... |
| time 5.52 |
| error 15992.076171875 |
| 52 self_attn.v_proj |
| Quantizing ... |
| time 4.59 |
| error 19542.921875 |
| 52 self_attn.q_proj |
| Quantizing ... |
| time 4.60 |
| error 16278.9853515625 |
| 52 self_attn.o_proj |
| Quantizing ... |
| time 5.36 |
| error 1193.5657958984375 |
| 52 mlp.up_proj |
| Quantizing ... |
| time 5.54 |
| error 64697.9375 |
| 52 mlp.gate_proj |
| Quantizing ... |
| time 4.71 |
| error 70637.9453125 |
| 52 mlp.down_proj |
| Quantizing ... |
| time 15.26 |
| error 14138.74609375 |
| 53 self_attn.k_proj |
| Quantizing ... |
| time 5.52 |
| error 15789.552734375 |
| 53 self_attn.v_proj |
| Quantizing ... |
| time 4.60 |
| error 18145.87109375 |
| 53 self_attn.q_proj |
| Quantizing ... |
| time 4.59 |
| error 15677.279296875 |
| 53 self_attn.o_proj |
| Quantizing ... |
| time 5.35 |
| error 987.3407592773438 |
| 53 mlp.up_proj |
| Quantizing ... |
| time 5.54 |
| error 64783.2578125 |
| 53 mlp.gate_proj |
| Quantizing ... |
| time 4.70 |
| error 70183.8125 |
| 53 mlp.down_proj |
| Quantizing ... |
| time 15.15 |
| error 14546.9462890625 |
| 54 self_attn.k_proj |
| Quantizing ... |
| time 5.57 |
| error 17387.84375 |
| 54 self_attn.v_proj |
| Quantizing ... |
| time 4.57 |
| error 21607.25390625 |
| 54 self_attn.q_proj |
| Quantizing ... |
| time 4.58 |
| error 17343.09375 |
| 54 self_attn.o_proj |
| Quantizing ... |
| time 5.33 |
| error 1196.340576171875 |
| 54 mlp.up_proj |
| Quantizing ... |
| time 5.55 |
| error 65141.66015625 |
| 54 mlp.gate_proj |
| Quantizing ... |
| time 4.72 |
| error 69816.265625 |
| 54 mlp.down_proj |
| Quantizing ... |
| time 15.27 |
| error 14795.326171875 |
| 55 self_attn.k_proj |
| Quantizing ... |
| time 5.53 |
| error 17946.92578125 |
| 55 self_attn.v_proj |
| Quantizing ... |
| time 4.62 |
| error 21772.1796875 |
| 55 self_attn.q_proj |
| Quantizing ... |
| time 4.61 |
| error 18037.7890625 |
| 55 self_attn.o_proj |
| Quantizing ... |
| time 5.37 |
| error 1498.04150390625 |
| 55 mlp.up_proj |
| Quantizing ... |
| time 5.55 |
| error 65091.9140625 |
| 55 mlp.gate_proj |
| Quantizing ... |
| time 4.71 |
| error 69162.703125 |
| 55 mlp.down_proj |
| Quantizing ... |
| time 15.27 |
| error 16282.494140625 |
| 56 self_attn.k_proj |
| Quantizing ... |
| time 5.53 |
| error 15704.654296875 |
| 56 self_attn.v_proj |
| Quantizing ... |
| time 4.62 |
| error 21285.42578125 |
| 56 self_attn.q_proj |
| Quantizing ... |
| time 4.62 |
| error 15670.8251953125 |
| 56 self_attn.o_proj |
| Quantizing ... |
| time 5.38 |
| error 2344.0654296875 |
| 56 mlp.up_proj |
| Quantizing ... |
| time 5.56 |
| error 64253.71875 |
| 56 mlp.gate_proj |
| Quantizing ... |
| time 4.72 |
| error 67705.203125 |
| 56 mlp.down_proj |
| Quantizing ... |
| time 15.32 |
| error 18808.955078125 |
| 57 self_attn.k_proj |
| Quantizing ... |
| time 5.54 |
| error 15364.15234375 |
| 57 self_attn.v_proj |
| Quantizing ... |
| time 4.62 |
| error 17928.765625 |
| 57 self_attn.q_proj |
| Quantizing ... |
| time 4.60 |
| error 15470.736328125 |
| 57 self_attn.o_proj |
| Quantizing ... |
| time 5.38 |
| error 1377.28271484375 |
| 57 mlp.up_proj |
| Quantizing ... |
| time 5.55 |
| error 61965.84375 |
| 57 mlp.gate_proj |
| Quantizing ... |
| time 4.72 |
| error 65122.2578125 |
| 57 mlp.down_proj |
| Quantizing ... |
| time 15.28 |
| error 23356.546875 |
| 58 self_attn.k_proj |
| Quantizing ... |
| time 5.50 |
| error 12949.615234375 |
| 58 self_attn.v_proj |
| Quantizing ... |
| time 4.57 |
| error 14125.650390625 |
| 58 self_attn.q_proj |
| Quantizing ... |
| time 4.58 |
| error 13172.3046875 |
| 58 self_attn.o_proj |
| Quantizing ... |
| time 5.35 |
| error 1190.738037109375 |
| 58 mlp.up_proj |
| Quantizing ... |
| time 4.92 |
| error 56147.046875 |
| 58 mlp.gate_proj |
| Quantizing ... |
| time 4.08 |
| error 60298.77734375 |
| 58 mlp.down_proj |
| Quantizing ... |
| time 13.60 |
| error 31925.26953125 |
| 59 self_attn.k_proj |
| Quantizing ... |
| time 4.89 |
| error 11584.0322265625 |
| 59 self_attn.v_proj |
| Quantizing ... |
| time 3.98 |
| error 10727.130859375 |
| 59 self_attn.q_proj |
| Quantizing ... |
| time 3.98 |
| error 11997.470703125 |
| 59 self_attn.o_proj |
| Quantizing ... |
| time 4.87 |
| error 2141.62890625 |
| 59 mlp.up_proj |
| Quantizing ... |
| time 5.01 |
| error 43433.7265625 |
| 59 mlp.gate_proj |
| Quantizing ... |
| time 4.22 |
| error 46841.99609375 |
| 59 mlp.down_proj |
| Quantizing ... |
| time 13.90 |
| error 51444.9765625 |
| 4572.00866651535 |
| Found cached dataset wikitext (/home/usbhost/.cache/huggingface/datasets/wikitext/wikitext-2-raw-v1/1.0.0/a241db52902eaf2c6aa732210bead40c090019a499ceb13bcbfa3f8ab646a126) |
| Found cached dataset wikitext (/home/usbhost/.cache/huggingface/datasets/wikitext/wikitext-2-raw-v1/1.0.0/a241db52902eaf2c6aa732210bead40c090019a499ceb13bcbfa3f8ab646a126) |
| wikitext2 |
| Evaluating ... |
| 0 |
| 1 |
| 2 |
| 3 |
| 4 |
| 5 |
| 6 |
| 7 |
| 8 |
| 9 |
| 10 |
| 11 |
| 12 |
| 13 |
| 14 |
| 15 |
| 16 |
| 17 |
| 18 |
| 19 |
| 20 |
| 21 |
| 22 |
| 23 |
| 24 |
| 25 |
| 26 |
| 27 |
| 28 |
| 29 |
| 30 |
| 31 |
| 32 |
| 33 |
| 34 |
| 35 |
| 36 |
| 37 |
| 38 |
| 39 |
| 40 |
| 41 |
| 42 |
| 43 |
| 44 |
| 45 |
| 46 |
| 47 |
| 48 |
| 49 |
| 50 |
| 51 |
| 52 |
| 53 |
| 54 |
| 55 |
| 56 |
| 57 |
| 58 |
| 59 |
| 4.230341911315918 |
| Found cached dataset ptb_text_only (/home/usbhost/.cache/huggingface/datasets/ptb_text_only/penn_treebank/1.1.0/8d1b97746fb9765d140e569ec5ddd35e20af4d37761f5e1bf357ea0b081f2c1f) |
| Found cached dataset ptb_text_only (/home/usbhost/.cache/huggingface/datasets/ptb_text_only/penn_treebank/1.1.0/8d1b97746fb9765d140e569ec5ddd35e20af4d37761f5e1bf357ea0b081f2c1f) |
| ptb-new |
| Evaluating ... |
| 0 |
| 1 |
| 2 |
| 3 |
| 4 |
| 5 |
| 6 |
| 7 |
| 8 |
| 9 |
| 10 |
| 11 |
| 12 |
| 13 |
| 14 |
| 15 |
| 16 |
| 17 |
| 18 |
| 19 |
| 20 |
| 21 |
| 22 |
| 23 |
| 24 |
| 25 |
| 26 |
| 27 |
| 28 |
| 29 |
| 30 |
| 31 |
| 32 |
| 33 |
| 34 |
| 35 |
| 36 |
| 37 |
| 38 |
| 39 |
| 40 |
| 41 |
| 42 |
| 43 |
| 44 |
| 45 |
| 46 |
| 47 |
| 48 |
| 49 |
| 50 |
| 51 |
| 52 |
| 53 |
| 54 |
| 55 |
| 56 |
| 57 |
| 58 |
| 59 |
| 8.243087768554688 |
| Found cached dataset json (/home/usbhost/.cache/huggingface/datasets/allenai___json/allenai--c4-6fbe877195f42de5/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51) |
| Found cached dataset json (/home/usbhost/.cache/huggingface/datasets/allenai___json/allenai--c4-efc3d4f4606f44bd/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51) |
| c4-new |
| Evaluating ... |
| 0 |
| 1 |
| 2 |
| 3 |
| 4 |
| 5 |
| 6 |
| 7 |
| 8 |
| 9 |
| 10 |
| 11 |
| 12 |
| 13 |
| 14 |
| 15 |
| 16 |
| 17 |
| 18 |
| 19 |
| 20 |
| 21 |
| 22 |
| 23 |
| 24 |
| 25 |
| 26 |
| 27 |
| 28 |
| 29 |
| 30 |
| 31 |
| 32 |
| 33 |
| 34 |
| 35 |
| 36 |
| 37 |
| 38 |
| 39 |
| 40 |
| 41 |
| 42 |
| 43 |
| 44 |
| 45 |
| 46 |
| 47 |
| 48 |
| 49 |
| 50 |
| 51 |
| 52 |
| 53 |
| 54 |
| 55 |
| 56 |
| 57 |
| 58 |
| 59 |
| 6.231330394744873 |
| Packing ... |
| model.layers.0.self_attn.k_proj |
| model.layers.0.self_attn.o_proj |
| model.layers.0.self_attn.q_proj |
| model.layers.0.self_attn.v_proj |
| model.layers.0.mlp.down_proj |
| model.layers.0.mlp.gate_proj |
| model.layers.0.mlp.up_proj |
| model.layers.1.self_attn.k_proj |
| model.layers.1.self_attn.o_proj |
| model.layers.1.self_attn.q_proj |
| model.layers.1.self_attn.v_proj |
| model.layers.1.mlp.down_proj |
| model.layers.1.mlp.gate_proj |
| model.layers.1.mlp.up_proj |
| model.layers.2.self_attn.k_proj |
| model.layers.2.self_attn.o_proj |
| model.layers.2.self_attn.q_proj |
| model.layers.2.self_attn.v_proj |
| model.layers.2.mlp.down_proj |
| model.layers.2.mlp.gate_proj |
| model.layers.2.mlp.up_proj |
| model.layers.3.self_attn.k_proj |
| model.layers.3.self_attn.o_proj |
| model.layers.3.self_attn.q_proj |
| model.layers.3.self_attn.v_proj |
| model.layers.3.mlp.down_proj |
| model.layers.3.mlp.gate_proj |
| model.layers.3.mlp.up_proj |
| model.layers.4.self_attn.k_proj |
| model.layers.4.self_attn.o_proj |
| model.layers.4.self_attn.q_proj |
| model.layers.4.self_attn.v_proj |
| model.layers.4.mlp.down_proj |
| model.layers.4.mlp.gate_proj |
| model.layers.4.mlp.up_proj |
| model.layers.5.self_attn.k_proj |
| model.layers.5.self_attn.o_proj |
| model.layers.5.self_attn.q_proj |
| model.layers.5.self_attn.v_proj |
| model.layers.5.mlp.down_proj |
| model.layers.5.mlp.gate_proj |
| model.layers.5.mlp.up_proj |
| model.layers.6.self_attn.k_proj |
| model.layers.6.self_attn.o_proj |
| model.layers.6.self_attn.q_proj |
| model.layers.6.self_attn.v_proj |
| model.layers.6.mlp.down_proj |
| model.layers.6.mlp.gate_proj |
| model.layers.6.mlp.up_proj |
| model.layers.7.self_attn.k_proj |
| model.layers.7.self_attn.o_proj |
| model.layers.7.self_attn.q_proj |
| model.layers.7.self_attn.v_proj |
| model.layers.7.mlp.down_proj |
| model.layers.7.mlp.gate_proj |
| model.layers.7.mlp.up_proj |
| model.layers.8.self_attn.k_proj |
| model.layers.8.self_attn.o_proj |
| model.layers.8.self_attn.q_proj |
| model.layers.8.self_attn.v_proj |
| model.layers.8.mlp.down_proj |
| model.layers.8.mlp.gate_proj |
| model.layers.8.mlp.up_proj |
| model.layers.9.self_attn.k_proj |
| model.layers.9.self_attn.o_proj |
| model.layers.9.self_attn.q_proj |
| model.layers.9.self_attn.v_proj |
| model.layers.9.mlp.down_proj |
| model.layers.9.mlp.gate_proj |
| model.layers.9.mlp.up_proj |
| model.layers.10.self_attn.k_proj |
| model.layers.10.self_attn.o_proj |
| model.layers.10.self_attn.q_proj |
| model.layers.10.self_attn.v_proj |
| model.layers.10.mlp.down_proj |
| model.layers.10.mlp.gate_proj |
| model.layers.10.mlp.up_proj |
| model.layers.11.self_attn.k_proj |
| model.layers.11.self_attn.o_proj |
| model.layers.11.self_attn.q_proj |
| model.layers.11.self_attn.v_proj |
| model.layers.11.mlp.down_proj |
| model.layers.11.mlp.gate_proj |
| model.layers.11.mlp.up_proj |
| model.layers.12.self_attn.k_proj |
| model.layers.12.self_attn.o_proj |
| model.layers.12.self_attn.q_proj |
| model.layers.12.self_attn.v_proj |
| model.layers.12.mlp.down_proj |
| model.layers.12.mlp.gate_proj |
| model.layers.12.mlp.up_proj |
| model.layers.13.self_attn.k_proj |
| model.layers.13.self_attn.o_proj |
| model.layers.13.self_attn.q_proj |
| model.layers.13.self_attn.v_proj |
| model.layers.13.mlp.down_proj |
| model.layers.13.mlp.gate_proj |
| model.layers.13.mlp.up_proj |
| model.layers.14.self_attn.k_proj |
| model.layers.14.self_attn.o_proj |
| model.layers.14.self_attn.q_proj |
| model.layers.14.self_attn.v_proj |
| model.layers.14.mlp.down_proj |
| model.layers.14.mlp.gate_proj |
| model.layers.14.mlp.up_proj |
| model.layers.15.self_attn.k_proj |
| model.layers.15.self_attn.o_proj |
| model.layers.15.self_attn.q_proj |
| model.layers.15.self_attn.v_proj |
| model.layers.15.mlp.down_proj |
| model.layers.15.mlp.gate_proj |
| model.layers.15.mlp.up_proj |
| model.layers.16.self_attn.k_proj |
| model.layers.16.self_attn.o_proj |
| model.layers.16.self_attn.q_proj |
| model.layers.16.self_attn.v_proj |
| model.layers.16.mlp.down_proj |
| model.layers.16.mlp.gate_proj |
| model.layers.16.mlp.up_proj |
| model.layers.17.self_attn.k_proj |
| model.layers.17.self_attn.o_proj |
| model.layers.17.self_attn.q_proj |
| model.layers.17.self_attn.v_proj |
| model.layers.17.mlp.down_proj |
| model.layers.17.mlp.gate_proj |
| model.layers.17.mlp.up_proj |
| model.layers.18.self_attn.k_proj |
| model.layers.18.self_attn.o_proj |
| model.layers.18.self_attn.q_proj |
| model.layers.18.self_attn.v_proj |
| model.layers.18.mlp.down_proj |
| model.layers.18.mlp.gate_proj |
| model.layers.18.mlp.up_proj |
| model.layers.19.self_attn.k_proj |
| model.layers.19.self_attn.o_proj |
| model.layers.19.self_attn.q_proj |
| model.layers.19.self_attn.v_proj |
| model.layers.19.mlp.down_proj |
| model.layers.19.mlp.gate_proj |
| model.layers.19.mlp.up_proj |
| model.layers.20.self_attn.k_proj |
| model.layers.20.self_attn.o_proj |
| model.layers.20.self_attn.q_proj |
| model.layers.20.self_attn.v_proj |
| model.layers.20.mlp.down_proj |
| model.layers.20.mlp.gate_proj |
| model.layers.20.mlp.up_proj |
| model.layers.21.self_attn.k_proj |
| model.layers.21.self_attn.o_proj |
| model.layers.21.self_attn.q_proj |
| model.layers.21.self_attn.v_proj |
| model.layers.21.mlp.down_proj |
| model.layers.21.mlp.gate_proj |
| model.layers.21.mlp.up_proj |
| model.layers.22.self_attn.k_proj |
| model.layers.22.self_attn.o_proj |
| model.layers.22.self_attn.q_proj |
| model.layers.22.self_attn.v_proj |
| model.layers.22.mlp.down_proj |
| model.layers.22.mlp.gate_proj |
| model.layers.22.mlp.up_proj |
| model.layers.23.self_attn.k_proj |
| model.layers.23.self_attn.o_proj |
| model.layers.23.self_attn.q_proj |
| model.layers.23.self_attn.v_proj |
| model.layers.23.mlp.down_proj |
| model.layers.23.mlp.gate_proj |
| model.layers.23.mlp.up_proj |
| model.layers.24.self_attn.k_proj |
| model.layers.24.self_attn.o_proj |
| model.layers.24.self_attn.q_proj |
| model.layers.24.self_attn.v_proj |
| model.layers.24.mlp.down_proj |
| model.layers.24.mlp.gate_proj |
| model.layers.24.mlp.up_proj |
| model.layers.25.self_attn.k_proj |
| model.layers.25.self_attn.o_proj |
| model.layers.25.self_attn.q_proj |
| model.layers.25.self_attn.v_proj |
| model.layers.25.mlp.down_proj |
| model.layers.25.mlp.gate_proj |
| model.layers.25.mlp.up_proj |
| model.layers.26.self_attn.k_proj |
| model.layers.26.self_attn.o_proj |
| model.layers.26.self_attn.q_proj |
| model.layers.26.self_attn.v_proj |
| model.layers.26.mlp.down_proj |
| model.layers.26.mlp.gate_proj |
| model.layers.26.mlp.up_proj |
| model.layers.27.self_attn.k_proj |
| model.layers.27.self_attn.o_proj |
| model.layers.27.self_attn.q_proj |
| model.layers.27.self_attn.v_proj |
| model.layers.27.mlp.down_proj |
| model.layers.27.mlp.gate_proj |
| model.layers.27.mlp.up_proj |
| model.layers.28.self_attn.k_proj |
| model.layers.28.self_attn.o_proj |
| model.layers.28.self_attn.q_proj |
| model.layers.28.self_attn.v_proj |
| model.layers.28.mlp.down_proj |
| model.layers.28.mlp.gate_proj |
| model.layers.28.mlp.up_proj |
| model.layers.29.self_attn.k_proj |
| model.layers.29.self_attn.o_proj |
| model.layers.29.self_attn.q_proj |
| model.layers.29.self_attn.v_proj |
| model.layers.29.mlp.down_proj |
| model.layers.29.mlp.gate_proj |
| model.layers.29.mlp.up_proj |
| model.layers.30.self_attn.k_proj |
| model.layers.30.self_attn.o_proj |
| model.layers.30.self_attn.q_proj |
| model.layers.30.self_attn.v_proj |
| model.layers.30.mlp.down_proj |
| model.layers.30.mlp.gate_proj |
| model.layers.30.mlp.up_proj |
| model.layers.31.self_attn.k_proj |
| model.layers.31.self_attn.o_proj |
| model.layers.31.self_attn.q_proj |
| model.layers.31.self_attn.v_proj |
| model.layers.31.mlp.down_proj |
| model.layers.31.mlp.gate_proj |
| model.layers.31.mlp.up_proj |
| model.layers.32.self_attn.k_proj |
| model.layers.32.self_attn.o_proj |
| model.layers.32.self_attn.q_proj |
| model.layers.32.self_attn.v_proj |
| model.layers.32.mlp.down_proj |
| model.layers.32.mlp.gate_proj |
| model.layers.32.mlp.up_proj |
| model.layers.33.self_attn.k_proj |
| model.layers.33.self_attn.o_proj |
| model.layers.33.self_attn.q_proj |
| model.layers.33.self_attn.v_proj |
| model.layers.33.mlp.down_proj |
| model.layers.33.mlp.gate_proj |
| model.layers.33.mlp.up_proj |
| model.layers.34.self_attn.k_proj |
| model.layers.34.self_attn.o_proj |
| model.layers.34.self_attn.q_proj |
| model.layers.34.self_attn.v_proj |
| model.layers.34.mlp.down_proj |
| model.layers.34.mlp.gate_proj |
| model.layers.34.mlp.up_proj |
| model.layers.35.self_attn.k_proj |
| model.layers.35.self_attn.o_proj |
| model.layers.35.self_attn.q_proj |
| model.layers.35.self_attn.v_proj |
| model.layers.35.mlp.down_proj |
| model.layers.35.mlp.gate_proj |
| model.layers.35.mlp.up_proj |
| model.layers.36.self_attn.k_proj |
| model.layers.36.self_attn.o_proj |
| model.layers.36.self_attn.q_proj |
| model.layers.36.self_attn.v_proj |
| model.layers.36.mlp.down_proj |
| model.layers.36.mlp.gate_proj |
| model.layers.36.mlp.up_proj |
| model.layers.37.self_attn.k_proj |
| model.layers.37.self_attn.o_proj |
| model.layers.37.self_attn.q_proj |
| model.layers.37.self_attn.v_proj |
| model.layers.37.mlp.down_proj |
| model.layers.37.mlp.gate_proj |
| model.layers.37.mlp.up_proj |
| model.layers.38.self_attn.k_proj |
| model.layers.38.self_attn.o_proj |
| model.layers.38.self_attn.q_proj |
| model.layers.38.self_attn.v_proj |
| model.layers.38.mlp.down_proj |
| model.layers.38.mlp.gate_proj |
| model.layers.38.mlp.up_proj |
| model.layers.39.self_attn.k_proj |
| model.layers.39.self_attn.o_proj |
| model.layers.39.self_attn.q_proj |
| model.layers.39.self_attn.v_proj |
| model.layers.39.mlp.down_proj |
| model.layers.39.mlp.gate_proj |
| model.layers.39.mlp.up_proj |
| model.layers.40.self_attn.k_proj |
| model.layers.40.self_attn.o_proj |
| model.layers.40.self_attn.q_proj |
| model.layers.40.self_attn.v_proj |
| model.layers.40.mlp.down_proj |
| model.layers.40.mlp.gate_proj |
| model.layers.40.mlp.up_proj |
| model.layers.41.self_attn.k_proj |
| model.layers.41.self_attn.o_proj |
| model.layers.41.self_attn.q_proj |
| model.layers.41.self_attn.v_proj |
| model.layers.41.mlp.down_proj |
| model.layers.41.mlp.gate_proj |
| model.layers.41.mlp.up_proj |
| model.layers.42.self_attn.k_proj |
| model.layers.42.self_attn.o_proj |
| model.layers.42.self_attn.q_proj |
| model.layers.42.self_attn.v_proj |
| model.layers.42.mlp.down_proj |
| model.layers.42.mlp.gate_proj |
| model.layers.42.mlp.up_proj |
| model.layers.43.self_attn.k_proj |
| model.layers.43.self_attn.o_proj |
| model.layers.43.self_attn.q_proj |
| model.layers.43.self_attn.v_proj |
| model.layers.43.mlp.down_proj |
| model.layers.43.mlp.gate_proj |
| model.layers.43.mlp.up_proj |
| model.layers.44.self_attn.k_proj |
| model.layers.44.self_attn.o_proj |
| model.layers.44.self_attn.q_proj |
| model.layers.44.self_attn.v_proj |
| model.layers.44.mlp.down_proj |
| model.layers.44.mlp.gate_proj |
| model.layers.44.mlp.up_proj |
| model.layers.45.self_attn.k_proj |
| model.layers.45.self_attn.o_proj |
| model.layers.45.self_attn.q_proj |
| model.layers.45.self_attn.v_proj |
| model.layers.45.mlp.down_proj |
| model.layers.45.mlp.gate_proj |
| model.layers.45.mlp.up_proj |
| model.layers.46.self_attn.k_proj |
| model.layers.46.self_attn.o_proj |
| model.layers.46.self_attn.q_proj |
| model.layers.46.self_attn.v_proj |
| model.layers.46.mlp.down_proj |
| model.layers.46.mlp.gate_proj |
| model.layers.46.mlp.up_proj |
| model.layers.47.self_attn.k_proj |
| model.layers.47.self_attn.o_proj |
| model.layers.47.self_attn.q_proj |
| model.layers.47.self_attn.v_proj |
| model.layers.47.mlp.down_proj |
| model.layers.47.mlp.gate_proj |
| model.layers.47.mlp.up_proj |
| model.layers.48.self_attn.k_proj |
| model.layers.48.self_attn.o_proj |
| model.layers.48.self_attn.q_proj |
| model.layers.48.self_attn.v_proj |
| model.layers.48.mlp.down_proj |
| model.layers.48.mlp.gate_proj |
| model.layers.48.mlp.up_proj |
| model.layers.49.self_attn.k_proj |
| model.layers.49.self_attn.o_proj |
| model.layers.49.self_attn.q_proj |
| model.layers.49.self_attn.v_proj |
| model.layers.49.mlp.down_proj |
| model.layers.49.mlp.gate_proj |
| model.layers.49.mlp.up_proj |
| model.layers.50.self_attn.k_proj |
| model.layers.50.self_attn.o_proj |
| model.layers.50.self_attn.q_proj |
| model.layers.50.self_attn.v_proj |
| model.layers.50.mlp.down_proj |
| model.layers.50.mlp.gate_proj |
| model.layers.50.mlp.up_proj |
| model.layers.51.self_attn.k_proj |
| model.layers.51.self_attn.o_proj |
| model.layers.51.self_attn.q_proj |
| model.layers.51.self_attn.v_proj |
| model.layers.51.mlp.down_proj |
| model.layers.51.mlp.gate_proj |
| model.layers.51.mlp.up_proj |
| model.layers.52.self_attn.k_proj |
| model.layers.52.self_attn.o_proj |
| model.layers.52.self_attn.q_proj |
| model.layers.52.self_attn.v_proj |
| model.layers.52.mlp.down_proj |
| model.layers.52.mlp.gate_proj |
| model.layers.52.mlp.up_proj |
| model.layers.53.self_attn.k_proj |
| model.layers.53.self_attn.o_proj |
| model.layers.53.self_attn.q_proj |
| model.layers.53.self_attn.v_proj |
| model.layers.53.mlp.down_proj |
| model.layers.53.mlp.gate_proj |
| model.layers.53.mlp.up_proj |
| model.layers.54.self_attn.k_proj |
| model.layers.54.self_attn.o_proj |
| model.layers.54.self_attn.q_proj |
| model.layers.54.self_attn.v_proj |
| model.layers.54.mlp.down_proj |
| model.layers.54.mlp.gate_proj |
| model.layers.54.mlp.up_proj |
| model.layers.55.self_attn.k_proj |
| model.layers.55.self_attn.o_proj |
| model.layers.55.self_attn.q_proj |
| model.layers.55.self_attn.v_proj |
| model.layers.55.mlp.down_proj |
| model.layers.55.mlp.gate_proj |
| model.layers.55.mlp.up_proj |
| model.layers.56.self_attn.k_proj |
| model.layers.56.self_attn.o_proj |
| model.layers.56.self_attn.q_proj |
| model.layers.56.self_attn.v_proj |
| model.layers.56.mlp.down_proj |
| model.layers.56.mlp.gate_proj |
| model.layers.56.mlp.up_proj |
| model.layers.57.self_attn.k_proj |
| model.layers.57.self_attn.o_proj |
| model.layers.57.self_attn.q_proj |
| model.layers.57.self_attn.v_proj |
| model.layers.57.mlp.down_proj |
| model.layers.57.mlp.gate_proj |
| model.layers.57.mlp.up_proj |
| model.layers.58.self_attn.k_proj |
| model.layers.58.self_attn.o_proj |
| model.layers.58.self_attn.q_proj |
| model.layers.58.self_attn.v_proj |
| model.layers.58.mlp.down_proj |
| model.layers.58.mlp.gate_proj |
| model.layers.58.mlp.up_proj |
| model.layers.59.self_attn.k_proj |
| model.layers.59.self_attn.o_proj |
| model.layers.59.self_attn.q_proj |
| model.layers.59.self_attn.v_proj |
| model.layers.59.mlp.down_proj |
| model.layers.59.mlp.gate_proj |
| model.layers.59.mlp.up_proj |
| Done. |
|
|