diff --git "a/measurement.json" "b/measurement.json" --- "a/measurement.json" +++ "b/measurement.json" @@ -1,65918 +1,42929 @@ { - "measurement": [ - { - "key": "model.layers.0.self_attn.q_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.188232421875, - "total_bits": 9178112.0, - "err": 0.052291933447122574, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 9964544.0, - "err": 0.05180061236023903, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 11013120.0, - "err": 0.02581789158284664, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750732421875, - "total_bits": 11537408.0, - "err": 0.025684988126158714, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250732421875, - "total_bits": 13634560.0, - "err": 0.02561614289879799, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.688232421875, - "total_bits": 15469568.0, - "err": 0.009172504767775536, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 12714752.0, - "err": 0.05236976593732834, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 13110272.0, - "err": 0.051387425512075424, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.188232421875, - "total_bits": 13372416.0, - "err": 0.025650247931480408, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500732421875, - "total_bits": 14683136.0, - "err": 0.025558194145560265, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6878662109375, - "total_bits": 15468032.0, - "err": 0.02568555437028408, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 16909056.0, - "err": 0.02611672878265381, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 17304576.0, - "err": 0.025532061234116554, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.250732421875, - "total_bits": 17828864.0, - "err": 0.013763204216957092, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.375732421875, - "total_bits": 18353152.0, - "err": 0.009121925570070744, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 21103360.0, - "err": 0.013893723487854004, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.250732421875, - "total_bits": 22023168.0, - "err": 0.009103807620704174, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375732421875, - "total_bits": 22547456.0, - "err": 0.007127670105546713, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.500732421875, - "total_bits": 23071744.0, - "err": 0.009100251831114292, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.813232421875, - "total_bits": 24382464.0, - "err": 0.007122076582163572, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 25297664.0, - "err": 0.009146258234977722, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 25693184.0, - "err": 0.009099281392991543, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28143310546875, - "total_bits": 26346240.0, - "err": 0.007116096559911966, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 34081792.0, - "err": 0.007120033260434866, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.0.self_attn.k_proj", - "numel": 524288, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.193359375, - "total_bits": 1149952.0, - "err": 0.030944211408495903, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.380859375, - "total_bits": 1248256.0, - "err": 0.030687818303704262, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.630859375, - "total_bits": 1379328.0, - "err": 0.014080392196774483, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.755859375, - "total_bits": 1444864.0, - "err": 0.014017038978636265, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.255859375, - "total_bits": 1707008.0, - "err": 0.013983924873173237, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.693359375, - "total_bits": 1936384.0, - "err": 0.005913974717259407, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03271484375, - "total_bits": 1590016.0, - "err": 0.032488852739334106, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.130859375, - "total_bits": 1641472.0, - "err": 0.03046833537518978, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.193359375, - "total_bits": 1674240.0, - "err": 0.01399689819663763, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.505859375, - "total_bits": 1838080.0, - "err": 0.013956055976450443, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6904296875, - "total_bits": 1934848.0, - "err": 0.014016089029610157, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03271484375, - "total_bits": 2114304.0, - "err": 0.014546887017786503, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.130859375, - "total_bits": 2165760.0, - "err": 0.013945193029940128, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.255859375, - "total_bits": 2231296.0, - "err": 0.008076507598161697, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.380859375, - "total_bits": 2296832.0, - "err": 0.005895753391087055, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03271484375, - "total_bits": 2638592.0, - "err": 0.008138325996696949, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.255859375, - "total_bits": 2755584.0, - "err": 0.005890493746846914, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.380859375, - "total_bits": 2821120.0, - "err": 0.005092145875096321, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.505859375, - "total_bits": 2886656.0, - "err": 0.005890173837542534, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.818359375, - "total_bits": 3050496.0, - "err": 0.0050903805531561375, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03271484375, - "total_bits": 3162880.0, - "err": 0.005909585859626532, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.130859375, - "total_bits": 3214336.0, - "err": 0.005890021100640297, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28271484375, - "total_bits": 3293952.0, - "err": 0.005089023150503635, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.130859375, - "total_bits": 4262912.0, - "err": 0.005090093240141869, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.0.self_attn.v_proj", - "numel": 524288, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.193359375, - "total_bits": 1149952.0, - "err": 0.1079910546541214, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.380859375, - "total_bits": 1248256.0, - "err": 0.0999349057674408, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.630859375, - "total_bits": 1379328.0, - "err": 0.058909252285957336, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.755859375, - "total_bits": 1444864.0, - "err": 0.05506906658411026, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.255859375, - "total_bits": 1707008.0, - "err": 0.05266030132770538, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.693359375, - "total_bits": 1936384.0, - "err": 0.022196661680936813, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03271484375, - "total_bits": 1590016.0, - "err": 0.10385514050722122, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.130859375, - "total_bits": 1641472.0, - "err": 0.09488368779420853, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.193359375, - "total_bits": 1674240.0, - "err": 0.05431171506643295, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.505859375, - "total_bits": 1838080.0, - "err": 0.04987079277634621, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6904296875, - "total_bits": 1934848.0, - "err": 0.05699087679386139, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03271484375, - "total_bits": 2114304.0, - "err": 0.060279425233602524, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.130859375, - "total_bits": 2165760.0, - "err": 0.04867777228355408, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.255859375, - "total_bits": 2231296.0, - "err": 0.025308992713689804, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.380859375, - "total_bits": 2296832.0, - "err": 0.015644537284970284, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03271484375, - "total_bits": 2638592.0, - "err": 0.030568616464734077, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.255859375, - "total_bits": 2755584.0, - "err": 0.012536760419607162, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.380859375, - "total_bits": 2821120.0, - "err": 0.007153891958296299, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.505859375, - "total_bits": 2886656.0, - "err": 0.011941342614591122, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.818359375, - "total_bits": 3050496.0, - "err": 0.0056835962459445, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03271484375, - "total_bits": 3162880.0, - "err": 0.01569254882633686, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.130859375, - "total_bits": 3214336.0, - "err": 0.011683344841003418, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28271484375, - "total_bits": 3293952.0, - "err": 0.005466498900204897, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.130859375, - "total_bits": 4262912.0, - "err": 0.004677966237068176, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.0.self_attn.o_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.188232421875, - "total_bits": 9178112.0, - "err": 0.0031233741901814938, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 9964544.0, - "err": 0.0026621378492563963, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 11013120.0, - "err": 0.002171872416511178, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750732421875, - "total_bits": 11537408.0, - "err": 0.0019019482424482703, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250732421875, - "total_bits": 13634560.0, - "err": 0.0012122183106839657, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.688232421875, - "total_bits": 15469568.0, - "err": 0.0009346073493361473, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 12714752.0, - "err": 0.002495122142136097, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 13110272.0, - "err": 0.0018719830550253391, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.188232421875, - "total_bits": 13372416.0, - "err": 0.0012975740246474743, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500732421875, - "total_bits": 14683136.0, - "err": 0.0011172732338309288, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6878662109375, - "total_bits": 15468032.0, - "err": 0.0010906093521043658, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 16909056.0, - "err": 0.0010286769829690456, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 17304576.0, - "err": 0.0009271390736103058, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.250732421875, - "total_bits": 17828864.0, - "err": 0.000663643004372716, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.375732421875, - "total_bits": 18353152.0, - "err": 0.0005865641869604588, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 21103360.0, - "err": 0.0005415863124653697, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.250732421875, - "total_bits": 22023168.0, - "err": 0.00048359614447690547, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375732421875, - "total_bits": 22547456.0, - "err": 0.000448303617304191, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.500732421875, - "total_bits": 23071744.0, - "err": 0.00047444491065107286, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.813232421875, - "total_bits": 24382464.0, - "err": 0.0004334172117523849, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 25297664.0, - "err": 0.00034938365570269525, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 25693184.0, - "err": 0.0004576722567435354, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28143310546875, - "total_bits": 26346240.0, - "err": 0.00027021951973438263, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 34081792.0, - "err": 0.00040493562119081616, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.0.mlp.gate_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.187766335227273, - "total_bits": 25234432.000000004, - "err": 0.16957035660743713, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375266335227273, - "total_bits": 27397120.000000004, - "err": 0.15772341191768646, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625266335227273, - "total_bits": 30280704.000000004, - "err": 0.15309622883796692, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750266335227273, - "total_bits": 31722496.000000004, - "err": 0.1366555392742157, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250266335227273, - "total_bits": 37489664.0, - "err": 0.07739554345607758, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.687766335227273, - "total_bits": 42535936.0, - "err": 0.07391770929098129, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313165838068183, - "total_bits": 34964224.0, - "err": 0.08826582133769989, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125266335227273, - "total_bits": 36047872.0, - "err": 0.08184850960969925, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.187766335227273, - "total_bits": 36768768.0, - "err": 0.0790339782834053, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500266335227273, - "total_bits": 40373248.0, - "err": 0.07018855959177017, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6876331676136362, - "total_bits": 42534400.0, - "err": 0.06396404653787613, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031316583806818, - "total_bits": 46498560.0, - "err": 0.04437677562236786, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1252663352272725, - "total_bits": 47582208.0, - "err": 0.03897319734096527, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2502663352272725, - "total_bits": 49024000.0, - "err": 0.03699536621570587, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3752663352272725, - "total_bits": 50465792.0, - "err": 0.036498699337244034, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031316583806818, - "total_bits": 58032896.0, - "err": 0.02219485118985176, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2502663352272725, - "total_bits": 60558336.0, - "err": 0.01908690854907036, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3752663352272725, - "total_bits": 62000128.0, - "err": 0.018888870254158974, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5002663352272725, - "total_bits": 63441920.0, - "err": 0.017655784264206886, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.8127663352272725, - "total_bits": 67046400.0, - "err": 0.016899380832910538, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031316583806818, - "total_bits": 69567232.0, - "err": 0.011687785387039185, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1252663352272725, - "total_bits": 70650880.0, - "err": 0.01179194264113903, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.281316583806818, - "total_bits": 72450816.0, - "err": 0.011023635976016521, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125266335227273, - "total_bits": 93719552.0, - "err": 0.007767403032630682, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.0.mlp.up_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.187766335227273, - "total_bits": 25234432.000000004, - "err": 0.20008538663387299, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375266335227273, - "total_bits": 27397120.000000004, - "err": 0.18670207262039185, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625266335227273, - "total_bits": 30280704.000000004, - "err": 0.1819426417350769, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750266335227273, - "total_bits": 31722496.000000004, - "err": 0.1628841906785965, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250266335227273, - "total_bits": 37489664.0, - "err": 0.09187270700931549, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.687766335227273, - "total_bits": 42535936.0, - "err": 0.08815769851207733, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313165838068183, - "total_bits": 34964224.0, - "err": 0.10346128791570663, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125266335227273, - "total_bits": 36047872.0, - "err": 0.09602950513362885, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.187766335227273, - "total_bits": 36768768.0, - "err": 0.09366291016340256, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500266335227273, - "total_bits": 40373248.0, - "err": 0.08342823386192322, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6876331676136362, - "total_bits": 42534400.0, - "err": 0.07616423815488815, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031316583806818, - "total_bits": 46498560.0, - "err": 0.05231811851263046, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1252663352272725, - "total_bits": 47582208.0, - "err": 0.045680735260248184, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2502663352272725, - "total_bits": 49024000.0, - "err": 0.043805453926324844, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3752663352272725, - "total_bits": 50465792.0, - "err": 0.04336704686284065, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031316583806818, - "total_bits": 58032896.0, - "err": 0.02606779709458351, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2502663352272725, - "total_bits": 60558336.0, - "err": 0.022373953834176064, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3752663352272725, - "total_bits": 62000128.0, - "err": 0.022218799218535423, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5002663352272725, - "total_bits": 63441920.0, - "err": 0.02067405730485916, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.8127663352272725, - "total_bits": 67046400.0, - "err": 0.01988166943192482, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031316583806818, - "total_bits": 69567232.0, - "err": 0.013499470427632332, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1252663352272725, - "total_bits": 70650880.0, - "err": 0.013416679576039314, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.281316583806818, - "total_bits": 72450816.0, - "err": 0.012851244769990444, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125266335227273, - "total_bits": 93719552.0, - "err": 0.00851262453943491, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.0.mlp.down_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1711869673295454, - "total_bits": 25043200.0, - "err": 0.06050647795200348, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 27402496.0, - "err": 0.045778077095746994, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 30286080.0, - "err": 0.03939078003168106, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.716641512784091, - "total_bits": 31334656.0, - "err": 0.03389550745487213, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.216641512784091, - "total_bits": 37101824.0, - "err": 0.025750841945409775, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7393687855113638, - "total_bits": 43131136.0, - "err": 0.019433658570051193, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 34965568.0, - "err": 0.03330892324447632, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 36053248.0, - "err": 0.030534951016306877, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1711869673295454, - "total_bits": 36577536.0, - "err": 0.02773662842810154, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.534823330965909, - "total_bits": 40771840.0, - "err": 0.0189218707382679, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.653775301846591, - "total_bits": 42143872.0, - "err": 0.018150335177779198, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 46499904.0, - "err": 0.01683591865003109, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 47587584.0, - "err": 0.014769107103347778, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.216641512784091, - "total_bits": 48636160.0, - "err": 0.012685174122452736, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.307550603693182, - "total_bits": 49684736.0, - "err": 0.012149952352046967, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 58034240.0, - "err": 0.008525945246219635, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.216641512784091, - "total_bits": 60170496.0, - "err": 0.007175568025559187, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.307550603693182, - "total_bits": 61219072.0, - "err": 0.007001944351941347, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.534823330965909, - "total_bits": 63840512.0, - "err": 0.0060197170823812485, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.693914240056818, - "total_bits": 65675520.0, - "err": 0.005715942941606045, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 69568576.0, - "err": 0.004792241845279932, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 70656256.0, - "err": 0.005406113341450691, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.213251287286932, - "total_bits": 71665728.0, - "err": 0.004170358180999756, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 93724928.0, - "err": 0.0043011619709432125, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.1.self_attn.q_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.188232421875, - "total_bits": 9178112.0, - "err": 0.037124525755643845, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 9964544.0, - "err": 0.03581106662750244, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 11013120.0, - "err": 0.018637804314494133, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750732421875, - "total_bits": 11537408.0, - "err": 0.017668157815933228, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250732421875, - "total_bits": 13634560.0, - "err": 0.01654578372836113, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.688232421875, - "total_bits": 15469568.0, - "err": 0.008828684687614441, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 12714752.0, - "err": 0.03910205885767937, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 13110272.0, - "err": 0.03415721654891968, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.188232421875, - "total_bits": 13372416.0, - "err": 0.01679990254342556, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500732421875, - "total_bits": 14683136.0, - "err": 0.01614486612379551, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6878662109375, - "total_bits": 15468032.0, - "err": 0.016392145305871964, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 16909056.0, - "err": 0.016829922795295715, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 17304576.0, - "err": 0.015760309994220734, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.250732421875, - "total_bits": 17828864.0, - "err": 0.009663495235145092, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.375732421875, - "total_bits": 18353152.0, - "err": 0.007715476211160421, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 21103360.0, - "err": 0.009414870291948318, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.250732421875, - "total_bits": 22023168.0, - "err": 0.007369752507656813, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375732421875, - "total_bits": 22547456.0, - "err": 0.006885427515953779, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.500732421875, - "total_bits": 23071744.0, - "err": 0.007326458115130663, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.813232421875, - "total_bits": 24382464.0, - "err": 0.006822196301072836, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 25297664.0, - "err": 0.00678561395034194, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 25693184.0, - "err": 0.007286595646291971, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28143310546875, - "total_bits": 26346240.0, - "err": 0.006087517365813255, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 34081792.0, - "err": 0.006760246120393276, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.1.self_attn.k_proj", - "numel": 524288, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.193359375, - "total_bits": 1149952.0, - "err": 0.032911431044340134, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.380859375, - "total_bits": 1248256.0, - "err": 0.03230016306042671, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.630859375, - "total_bits": 1379328.0, - "err": 0.013914729468524456, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.755859375, - "total_bits": 1444864.0, - "err": 0.013534150086343288, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.255859375, - "total_bits": 1707008.0, - "err": 0.013068655505776405, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.693359375, - "total_bits": 1936384.0, - "err": 0.006453466136008501, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03271484375, - "total_bits": 1590016.0, - "err": 0.041759662330150604, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.130859375, - "total_bits": 1641472.0, - "err": 0.03142638877034187, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.193359375, - "total_bits": 1674240.0, - "err": 0.013156902976334095, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.505859375, - "total_bits": 1838080.0, - "err": 0.012909016571938992, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6904296875, - "total_bits": 1934848.0, - "err": 0.013037530705332756, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03271484375, - "total_bits": 2114304.0, - "err": 0.015585266053676605, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.130859375, - "total_bits": 2165760.0, - "err": 0.012753867544233799, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.255859375, - "total_bits": 2231296.0, - "err": 0.007581732235848904, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.380859375, - "total_bits": 2296832.0, - "err": 0.006023495923727751, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03271484375, - "total_bits": 2638592.0, - "err": 0.0074391490779817104, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.255859375, - "total_bits": 2755584.0, - "err": 0.005898887757211924, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.380859375, - "total_bits": 2821120.0, - "err": 0.00540822884067893, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.505859375, - "total_bits": 2886656.0, - "err": 0.005882877390831709, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.818359375, - "total_bits": 3050496.0, - "err": 0.0053858682513237, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03271484375, - "total_bits": 3162880.0, - "err": 0.005716139916330576, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.130859375, - "total_bits": 3214336.0, - "err": 0.005869840271770954, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28271484375, - "total_bits": 3293952.0, - "err": 0.005190563853830099, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.130859375, - "total_bits": 4262912.0, - "err": 0.0053633227944374084, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.1.self_attn.v_proj", - "numel": 524288, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.193359375, - "total_bits": 1149952.0, - "err": 0.14662803709506989, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.380859375, - "total_bits": 1248256.0, - "err": 0.1279224455356598, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.630859375, - "total_bits": 1379328.0, - "err": 0.11731971055269241, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.755859375, - "total_bits": 1444864.0, - "err": 0.09969724714756012, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.255859375, - "total_bits": 1707008.0, - "err": 0.06625143438577652, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.693359375, - "total_bits": 1936384.0, - "err": 0.05797503516077995, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03271484375, - "total_bits": 1590016.0, - "err": 0.0938018411397934, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.130859375, - "total_bits": 1641472.0, - "err": 0.07848165929317474, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.193359375, - "total_bits": 1674240.0, - "err": 0.06989801675081253, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.505859375, - "total_bits": 1838080.0, - "err": 0.056696854531764984, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6904296875, - "total_bits": 1934848.0, - "err": 0.05296376347541809, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03271484375, - "total_bits": 2114304.0, - "err": 0.04941624030470848, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.130859375, - "total_bits": 2165760.0, - "err": 0.03891771659255028, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.255859375, - "total_bits": 2231296.0, - "err": 0.03292332962155342, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.380859375, - "total_bits": 2296832.0, - "err": 0.030496304854750633, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03271484375, - "total_bits": 2638592.0, - "err": 0.026823323220014572, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.255859375, - "total_bits": 2755584.0, - "err": 0.017243146896362305, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.380859375, - "total_bits": 2821120.0, - "err": 0.015522593632340431, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.505859375, - "total_bits": 2886656.0, - "err": 0.015405681915581226, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.818359375, - "total_bits": 3050496.0, - "err": 0.012728034518659115, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03271484375, - "total_bits": 3162880.0, - "err": 0.014853866770863533, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.130859375, - "total_bits": 3214336.0, - "err": 0.012295332737267017, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28271484375, - "total_bits": 3293952.0, - "err": 0.009563899599015713, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.130859375, - "total_bits": 4262912.0, - "err": 0.006572611629962921, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.1.self_attn.o_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.188232421875, - "total_bits": 9178112.0, - "err": 0.14363518357276917, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 9964544.0, - "err": 0.1006222814321518, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 11013120.0, - "err": 0.07484669983386993, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750732421875, - "total_bits": 11537408.0, - "err": 0.06510074436664581, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250732421875, - "total_bits": 13634560.0, - "err": 0.056290630251169205, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.688232421875, - "total_bits": 15469568.0, - "err": 0.035910967737436295, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 12714752.0, - "err": 0.08946192264556885, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 13110272.0, - "err": 0.08181318640708923, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.188232421875, - "total_bits": 13372416.0, - "err": 0.06705870479345322, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500732421875, - "total_bits": 14683136.0, - "err": 0.04509090632200241, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6878662109375, - "total_bits": 15468032.0, - "err": 0.04355236887931824, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 16909056.0, - "err": 0.04542400315403938, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 17304576.0, - "err": 0.039340559393167496, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.250732421875, - "total_bits": 17828864.0, - "err": 0.028325678780674934, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.375732421875, - "total_bits": 18353152.0, - "err": 0.025083908811211586, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 21103360.0, - "err": 0.02304903231561184, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.250732421875, - "total_bits": 22023168.0, - "err": 0.016707919538021088, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375732421875, - "total_bits": 22547456.0, - "err": 0.01570335403084755, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.500732421875, - "total_bits": 23071744.0, - "err": 0.014909377321600914, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.813232421875, - "total_bits": 24382464.0, - "err": 0.012811603024601936, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 25297664.0, - "err": 0.012945704162120819, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 25693184.0, - "err": 0.014064265415072441, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28143310546875, - "total_bits": 26346240.0, - "err": 0.0096386494114995, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 34081792.0, - "err": 0.011042609810829163, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.1.mlp.gate_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.187766335227273, - "total_bits": 25234432.000000004, - "err": 0.16008764505386353, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375266335227273, - "total_bits": 27397120.000000004, - "err": 0.1495831161737442, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625266335227273, - "total_bits": 30280704.000000004, - "err": 0.14607729017734528, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750266335227273, - "total_bits": 31722496.000000004, - "err": 0.13135893642902374, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250266335227273, - "total_bits": 37489664.0, - "err": 0.0725451409816742, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.687766335227273, - "total_bits": 42535936.0, - "err": 0.06986071169376373, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313165838068183, - "total_bits": 34964224.0, - "err": 0.08158606290817261, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125266335227273, - "total_bits": 36047872.0, - "err": 0.07549584656953812, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.187766335227273, - "total_bits": 36768768.0, - "err": 0.07391628623008728, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500266335227273, - "total_bits": 40373248.0, - "err": 0.06628118455410004, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6876331676136362, - "total_bits": 42534400.0, - "err": 0.06117544323205948, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031316583806818, - "total_bits": 46498560.0, - "err": 0.04132668673992157, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1252663352272725, - "total_bits": 47582208.0, - "err": 0.03590002283453941, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2502663352272725, - "total_bits": 49024000.0, - "err": 0.03458734601736069, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3752663352272725, - "total_bits": 50465792.0, - "err": 0.03428453579545021, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031316583806818, - "total_bits": 58032896.0, - "err": 0.020618587732315063, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2502663352272725, - "total_bits": 60558336.0, - "err": 0.017858516424894333, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3752663352272725, - "total_bits": 62000128.0, - "err": 0.017762063071131706, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5002663352272725, - "total_bits": 63441920.0, - "err": 0.016602789983153343, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.8127663352272725, - "total_bits": 67046400.0, - "err": 0.016054706647992134, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031316583806818, - "total_bits": 69567232.0, - "err": 0.010918354615569115, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1252663352272725, - "total_bits": 70650880.0, - "err": 0.010944118723273277, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.281316583806818, - "total_bits": 72450816.0, - "err": 0.010494517162442207, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125266335227273, - "total_bits": 93719552.0, - "err": 0.007349228486418724, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.1.mlp.up_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.187766335227273, - "total_bits": 25234432.000000004, - "err": 0.1957293599843979, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375266335227273, - "total_bits": 27397120.000000004, - "err": 0.18378986418247223, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625266335227273, - "total_bits": 30280704.000000004, - "err": 0.1799021065235138, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750266335227273, - "total_bits": 31722496.000000004, - "err": 0.16238214075565338, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250266335227273, - "total_bits": 37489664.0, - "err": 0.08952812105417252, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.687766335227273, - "total_bits": 42535936.0, - "err": 0.0864410474896431, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313165838068183, - "total_bits": 34964224.0, - "err": 0.10020822286605835, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125266335227273, - "total_bits": 36047872.0, - "err": 0.09273364394903183, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.187766335227273, - "total_bits": 36768768.0, - "err": 0.0911199301481247, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500266335227273, - "total_bits": 40373248.0, - "err": 0.08212697505950928, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6876331676136362, - "total_bits": 42534400.0, - "err": 0.07591396570205688, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031316583806818, - "total_bits": 46498560.0, - "err": 0.050743237137794495, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1252663352272725, - "total_bits": 47582208.0, - "err": 0.04407177120447159, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2502663352272725, - "total_bits": 49024000.0, - "err": 0.0426463820040226, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3752663352272725, - "total_bits": 50465792.0, - "err": 0.042304351925849915, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031316583806818, - "total_bits": 58032896.0, - "err": 0.025289149954915047, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2502663352272725, - "total_bits": 60558336.0, - "err": 0.021690359339118004, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3752663352272725, - "total_bits": 62000128.0, - "err": 0.021600032225251198, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5002663352272725, - "total_bits": 63441920.0, - "err": 0.02018498256802559, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.8127663352272725, - "total_bits": 67046400.0, - "err": 0.019527263939380646, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031316583806818, - "total_bits": 69567232.0, - "err": 0.013133825734257698, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1252663352272725, - "total_bits": 70650880.0, - "err": 0.012822617776691914, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.281316583806818, - "total_bits": 72450816.0, - "err": 0.012662775814533234, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125266335227273, - "total_bits": 93719552.0, - "err": 0.008029390126466751, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.1.mlp.down_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1711869673295454, - "total_bits": 25043200.0, - "err": 0.1707036793231964, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 27402496.0, - "err": 0.15646344423294067, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 30286080.0, - "err": 0.1509632170200348, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.716641512784091, - "total_bits": 31334656.0, - "err": 0.13571825623512268, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.216641512784091, - "total_bits": 37101824.0, - "err": 0.07817486673593521, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7393687855113638, - "total_bits": 43131136.0, - "err": 0.07257773727178574, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 34965568.0, - "err": 0.08961686491966248, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 36053248.0, - "err": 0.08277830481529236, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1711869673295454, - "total_bits": 36577536.0, - "err": 0.07967153191566467, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.534823330965909, - "total_bits": 40771840.0, - "err": 0.06885861605405807, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.653775301846591, - "total_bits": 42143872.0, - "err": 0.0658809170126915, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 46499904.0, - "err": 0.04568619281053543, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 47587584.0, - "err": 0.03971594199538231, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.216641512784091, - "total_bits": 48636160.0, - "err": 0.03765658289194107, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.307550603693182, - "total_bits": 49684736.0, - "err": 0.03714071586728096, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 58034240.0, - "err": 0.02291998639702797, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.216641512784091, - "total_bits": 60170496.0, - "err": 0.019953064620494843, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.307550603693182, - "total_bits": 61219072.0, - "err": 0.019750699400901794, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.534823330965909, - "total_bits": 63840512.0, - "err": 0.018169421702623367, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.693914240056818, - "total_bits": 65675520.0, - "err": 0.017995338886976242, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 69568576.0, - "err": 0.012453882023692131, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 70656256.0, - "err": 0.013052660040557384, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.213251287286932, - "total_bits": 71665728.0, - "err": 0.011803124099969864, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 93724928.0, - "err": 0.00954122468829155, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.2.self_attn.q_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.188232421875, - "total_bits": 9178112.0, - "err": 0.04149484634399414, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 9964544.0, - "err": 0.03809940069913864, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 11013120.0, - "err": 0.027193065732717514, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750732421875, - "total_bits": 11537408.0, - "err": 0.02412271872162819, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250732421875, - "total_bits": 13634560.0, - "err": 0.01883154734969139, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.688232421875, - "total_bits": 15469568.0, - "err": 0.012845519930124283, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 12714752.0, - "err": 0.033992234617471695, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 13110272.0, - "err": 0.031594544649124146, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.188232421875, - "total_bits": 13372416.0, - "err": 0.01946191117167473, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500732421875, - "total_bits": 14683136.0, - "err": 0.017285039648413658, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6878662109375, - "total_bits": 15468032.0, - "err": 0.017163729295134544, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 16909056.0, - "err": 0.016642693430185318, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 17304576.0, - "err": 0.015057302080094814, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.250732421875, - "total_bits": 17828864.0, - "err": 0.009602747857570648, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.375732421875, - "total_bits": 18353152.0, - "err": 0.007878120057284832, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 21103360.0, - "err": 0.008457714691758156, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.250732421875, - "total_bits": 22023168.0, - "err": 0.005871900822967291, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375732421875, - "total_bits": 22547456.0, - "err": 0.005075845401734114, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.500732421875, - "total_bits": 23071744.0, - "err": 0.00562724843621254, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.813232421875, - "total_bits": 24382464.0, - "err": 0.004698228556662798, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 25297664.0, - "err": 0.004992329515516758, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 25693184.0, - "err": 0.005292040295898914, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28143310546875, - "total_bits": 26346240.0, - "err": 0.003659941954538226, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 34081792.0, - "err": 0.004124726168811321, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.2.self_attn.k_proj", - "numel": 524288, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.193359375, - "total_bits": 1149952.0, - "err": 0.026748064905405045, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.380859375, - "total_bits": 1248256.0, - "err": 0.02553068846464157, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.630859375, - "total_bits": 1379328.0, - "err": 0.014378341846168041, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.755859375, - "total_bits": 1444864.0, - "err": 0.013239945285022259, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.255859375, - "total_bits": 1707008.0, - "err": 0.011497309431433678, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.693359375, - "total_bits": 1936384.0, - "err": 0.0067384555004537106, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03271484375, - "total_bits": 1590016.0, - "err": 0.027210727334022522, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.130859375, - "total_bits": 1641472.0, - "err": 0.02342303842306137, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.193359375, - "total_bits": 1674240.0, - "err": 0.011730147525668144, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.505859375, - "total_bits": 1838080.0, - "err": 0.011016817763447762, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6904296875, - "total_bits": 1934848.0, - "err": 0.011166601441800594, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03271484375, - "total_bits": 2114304.0, - "err": 0.012134240940213203, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.130859375, - "total_bits": 2165760.0, - "err": 0.010383576154708862, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.255859375, - "total_bits": 2231296.0, - "err": 0.006380401086062193, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.380859375, - "total_bits": 2296832.0, - "err": 0.005124881863594055, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03271484375, - "total_bits": 2638592.0, - "err": 0.005980214569717646, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.255859375, - "total_bits": 2755584.0, - "err": 0.0045901876874268055, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.380859375, - "total_bits": 2821120.0, - "err": 0.004227264318615198, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.505859375, - "total_bits": 2886656.0, - "err": 0.0045300619676709175, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.818359375, - "total_bits": 3050496.0, - "err": 0.004140910692512989, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03271484375, - "total_bits": 3162880.0, - "err": 0.004244142211973667, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.130859375, - "total_bits": 3214336.0, - "err": 0.004456085618585348, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28271484375, - "total_bits": 3293952.0, - "err": 0.0037930530961602926, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.130859375, - "total_bits": 4262912.0, - "err": 0.004026591777801514, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.2.self_attn.v_proj", - "numel": 524288, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.193359375, - "total_bits": 1149952.0, - "err": 0.1776852011680603, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.380859375, - "total_bits": 1248256.0, - "err": 0.15867750346660614, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.630859375, - "total_bits": 1379328.0, - "err": 0.14887766540050507, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.755859375, - "total_bits": 1444864.0, - "err": 0.12875381112098694, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.255859375, - "total_bits": 1707008.0, - "err": 0.08178315311670303, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.693359375, - "total_bits": 1936384.0, - "err": 0.07426200807094574, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03271484375, - "total_bits": 1590016.0, - "err": 0.10861406475305557, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.130859375, - "total_bits": 1641472.0, - "err": 0.0930212140083313, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.193359375, - "total_bits": 1674240.0, - "err": 0.08489270508289337, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.505859375, - "total_bits": 1838080.0, - "err": 0.07091733068227768, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6904296875, - "total_bits": 1934848.0, - "err": 0.06562458723783493, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03271484375, - "total_bits": 2114304.0, - "err": 0.05678337812423706, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.130859375, - "total_bits": 2165760.0, - "err": 0.0452289804816246, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.255859375, - "total_bits": 2231296.0, - "err": 0.039746593683958054, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.380859375, - "total_bits": 2296832.0, - "err": 0.03799326717853546, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03271484375, - "total_bits": 2638592.0, - "err": 0.02929680049419403, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.255859375, - "total_bits": 2755584.0, - "err": 0.02029741182923317, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.380859375, - "total_bits": 2821120.0, - "err": 0.019353846088051796, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.505859375, - "total_bits": 2886656.0, - "err": 0.018116015940904617, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.818359375, - "total_bits": 3050496.0, - "err": 0.01620212197303772, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03271484375, - "total_bits": 3162880.0, - "err": 0.01565810665488243, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.130859375, - "total_bits": 3214336.0, - "err": 0.01326664723455906, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28271484375, - "total_bits": 3293952.0, - "err": 0.01144985668361187, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.130859375, - "total_bits": 4262912.0, - "err": 0.007445401046425104, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.2.self_attn.o_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.188232421875, - "total_bits": 9178112.0, - "err": 0.17829671502113342, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 9964544.0, - "err": 0.13980349898338318, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 11013120.0, - "err": 0.11819276958703995, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750732421875, - "total_bits": 11537408.0, - "err": 0.093250572681427, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250732421875, - "total_bits": 13634560.0, - "err": 0.0778292715549469, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.688232421875, - "total_bits": 15469568.0, - "err": 0.0606229268014431, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 12714752.0, - "err": 0.10868401825428009, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 13110272.0, - "err": 0.09897098690271378, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.188232421875, - "total_bits": 13372416.0, - "err": 0.08452847599983215, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500732421875, - "total_bits": 14683136.0, - "err": 0.05881441384553909, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6878662109375, - "total_bits": 15468032.0, - "err": 0.05598664656281471, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 16909056.0, - "err": 0.05569577217102051, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 17304576.0, - "err": 0.0482260100543499, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.250732421875, - "total_bits": 17828864.0, - "err": 0.03879465162754059, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.375732421875, - "total_bits": 18353152.0, - "err": 0.03623592481017113, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 21103360.0, - "err": 0.028458157554268837, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.250732421875, - "total_bits": 22023168.0, - "err": 0.022365963086485863, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375732421875, - "total_bits": 22547456.0, - "err": 0.021424073725938797, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.500732421875, - "total_bits": 23071744.0, - "err": 0.01921255700290203, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.813232421875, - "total_bits": 24382464.0, - "err": 0.01718822494149208, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 25297664.0, - "err": 0.016286909580230713, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 25693184.0, - "err": 0.017641786485910416, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28143310546875, - "total_bits": 26346240.0, - "err": 0.013550245203077793, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 34081792.0, - "err": 0.014065369963645935, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.2.mlp.gate_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.187766335227273, - "total_bits": 25234432.000000004, - "err": 0.20116327702999115, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375266335227273, - "total_bits": 27397120.000000004, - "err": 0.1889110654592514, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625266335227273, - "total_bits": 30280704.000000004, - "err": 0.18497300148010254, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750266335227273, - "total_bits": 31722496.000000004, - "err": 0.16666004061698914, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250266335227273, - "total_bits": 37489664.0, - "err": 0.09247571229934692, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.687766335227273, - "total_bits": 42535936.0, - "err": 0.08924108743667603, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313165838068183, - "total_bits": 34964224.0, - "err": 0.10357695817947388, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125266335227273, - "total_bits": 36047872.0, - "err": 0.0957494005560875, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.187766335227273, - "total_bits": 36768768.0, - "err": 0.0940994992852211, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500266335227273, - "total_bits": 40373248.0, - "err": 0.08469220995903015, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6876331676136362, - "total_bits": 42534400.0, - "err": 0.07816007733345032, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031316583806818, - "total_bits": 46498560.0, - "err": 0.05259072035551071, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1252663352272725, - "total_bits": 47582208.0, - "err": 0.0456685908138752, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2502663352272725, - "total_bits": 49024000.0, - "err": 0.04418307542800903, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3752663352272725, - "total_bits": 50465792.0, - "err": 0.04383992776274681, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031316583806818, - "total_bits": 58032896.0, - "err": 0.026241164654493332, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2502663352272725, - "total_bits": 60558336.0, - "err": 0.02270766720175743, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3752663352272725, - "total_bits": 62000128.0, - "err": 0.022601637989282608, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5002663352272725, - "total_bits": 63441920.0, - "err": 0.0211339071393013, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.8127663352272725, - "total_bits": 67046400.0, - "err": 0.020457519218325615, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031316583806818, - "total_bits": 69567232.0, - "err": 0.013811670243740082, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1252663352272725, - "total_bits": 70650880.0, - "err": 0.013752011582255363, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.281316583806818, - "total_bits": 72450816.0, - "err": 0.013359622098505497, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125266335227273, - "total_bits": 93719552.0, - "err": 0.009068381041288376, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.2.mlp.up_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.187766335227273, - "total_bits": 25234432.000000004, - "err": 0.2195136398077011, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375266335227273, - "total_bits": 27397120.000000004, - "err": 0.2065044641494751, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625266335227273, - "total_bits": 30280704.000000004, - "err": 0.20223259925842285, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750266335227273, - "total_bits": 31722496.000000004, - "err": 0.18258275091648102, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250266335227273, - "total_bits": 37489664.0, - "err": 0.10126534849405289, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.687766335227273, - "total_bits": 42535936.0, - "err": 0.09780342131853104, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313165838068183, - "total_bits": 34964224.0, - "err": 0.11330129206180573, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125266335227273, - "total_bits": 36047872.0, - "err": 0.10478270798921585, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.187766335227273, - "total_bits": 36768768.0, - "err": 0.10309286415576935, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500266335227273, - "total_bits": 40373248.0, - "err": 0.0928243100643158, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6876331676136362, - "total_bits": 42534400.0, - "err": 0.08572933077812195, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031316583806818, - "total_bits": 46498560.0, - "err": 0.057442814111709595, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1252663352272725, - "total_bits": 47582208.0, - "err": 0.04990394413471222, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2502663352272725, - "total_bits": 49024000.0, - "err": 0.04833691567182541, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3752663352272725, - "total_bits": 50465792.0, - "err": 0.04796026647090912, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031316583806818, - "total_bits": 58032896.0, - "err": 0.028686169534921646, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2502663352272725, - "total_bits": 60558336.0, - "err": 0.024685924872756004, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3752663352272725, - "total_bits": 62000128.0, - "err": 0.0245658066123724, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5002663352272725, - "total_bits": 63441920.0, - "err": 0.022956322878599167, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.8127663352272725, - "total_bits": 67046400.0, - "err": 0.022205665707588196, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031316583806818, - "total_bits": 69567232.0, - "err": 0.014998165890574455, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1252663352272725, - "total_bits": 70650880.0, - "err": 0.01467511523514986, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.281316583806818, - "total_bits": 72450816.0, - "err": 0.014489056542515755, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125266335227273, - "total_bits": 93719552.0, - "err": 0.009342143312096596, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.2.mlp.down_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1711869673295454, - "total_bits": 25043200.0, - "err": 0.22632376849651337, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 27402496.0, - "err": 0.20988136529922485, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 30286080.0, - "err": 0.20322078466415405, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.716641512784091, - "total_bits": 31334656.0, - "err": 0.1832873523235321, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.216641512784091, - "total_bits": 37101824.0, - "err": 0.10498913377523422, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7393687855113638, - "total_bits": 43131136.0, - "err": 0.09822049736976624, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 34965568.0, - "err": 0.12023387849330902, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 36053248.0, - "err": 0.11073356866836548, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1711869673295454, - "total_bits": 36577536.0, - "err": 0.10666035860776901, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.534823330965909, - "total_bits": 40771840.0, - "err": 0.09313545376062393, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.653775301846591, - "total_bits": 42143872.0, - "err": 0.08912400156259537, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 46499904.0, - "err": 0.06110132485628128, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 47587584.0, - "err": 0.05303585156798363, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.216641512784091, - "total_bits": 48636160.0, - "err": 0.05047333240509033, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.307550603693182, - "total_bits": 49684736.0, - "err": 0.049870483577251434, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 58034240.0, - "err": 0.030630696564912796, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.216641512784091, - "total_bits": 60170496.0, - "err": 0.026440003886818886, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.307550603693182, - "total_bits": 61219072.0, - "err": 0.02619139850139618, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.534823330965909, - "total_bits": 63840512.0, - "err": 0.024101344868540764, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.693914240056818, - "total_bits": 65675520.0, - "err": 0.023925339803099632, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 69568576.0, - "err": 0.01651516929268837, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 70656256.0, - "err": 0.016841299831867218, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.213251287286932, - "total_bits": 71665728.0, - "err": 0.015708105638623238, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 93724928.0, - "err": 0.011899827048182487, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.3.self_attn.q_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.188232421875, - "total_bits": 9178112.0, - "err": 0.06750202924013138, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 9964544.0, - "err": 0.06052366644144058, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 11013120.0, - "err": 0.05426434054970741, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750732421875, - "total_bits": 11537408.0, - "err": 0.047384463250637054, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250732421875, - "total_bits": 13634560.0, - "err": 0.030872056260704994, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.688232421875, - "total_bits": 15469568.0, - "err": 0.026885155588388443, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 12714752.0, - "err": 0.0419774055480957, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 13110272.0, - "err": 0.03873877599835396, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.188232421875, - "total_bits": 13372416.0, - "err": 0.03211524337530136, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500732421875, - "total_bits": 14683136.0, - "err": 0.027057619765400887, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6878662109375, - "total_bits": 15468032.0, - "err": 0.025130635127425194, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 16909056.0, - "err": 0.021257828921079636, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 17304576.0, - "err": 0.018560880795121193, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.250732421875, - "total_bits": 17828864.0, - "err": 0.015028600580990314, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.375732421875, - "total_bits": 18353152.0, - "err": 0.014098347164690495, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 21103360.0, - "err": 0.010696274228394032, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.250732421875, - "total_bits": 22023168.0, - "err": 0.008077203296124935, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375732421875, - "total_bits": 22547456.0, - "err": 0.0076490058563649654, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.500732421875, - "total_bits": 23071744.0, - "err": 0.007351399399340153, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.813232421875, - "total_bits": 24382464.0, - "err": 0.006615730002522469, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 25297664.0, - "err": 0.005795732140541077, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 25693184.0, - "err": 0.005841245874762535, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28143310546875, - "total_bits": 26346240.0, - "err": 0.004735962022095919, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 34081792.0, - "err": 0.004108896944671869, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.3.self_attn.k_proj", - "numel": 524288, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.193359375, - "total_bits": 1149952.0, - "err": 0.041945621371269226, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.380859375, - "total_bits": 1248256.0, - "err": 0.037618089467287064, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.630859375, - "total_bits": 1379328.0, - "err": 0.03072923608124256, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.755859375, - "total_bits": 1444864.0, - "err": 0.02686200477182865, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.255859375, - "total_bits": 1707008.0, - "err": 0.018844854086637497, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.693359375, - "total_bits": 1936384.0, - "err": 0.015142050571739674, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03271484375, - "total_bits": 1590016.0, - "err": 0.030203649774193764, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.130859375, - "total_bits": 1641472.0, - "err": 0.027207184582948685, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.193359375, - "total_bits": 1674240.0, - "err": 0.019644679501652718, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.505859375, - "total_bits": 1838080.0, - "err": 0.01663532294332981, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6904296875, - "total_bits": 1934848.0, - "err": 0.01577812433242798, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03271484375, - "total_bits": 2114304.0, - "err": 0.014428389258682728, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.130859375, - "total_bits": 2165760.0, - "err": 0.012846751138567924, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.255859375, - "total_bits": 2231296.0, - "err": 0.009500566869974136, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.380859375, - "total_bits": 2296832.0, - "err": 0.008492281660437584, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03271484375, - "total_bits": 2638592.0, - "err": 0.007485235575586557, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.255859375, - "total_bits": 2755584.0, - "err": 0.00551835261285305, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.380859375, - "total_bits": 2821120.0, - "err": 0.004987175110727549, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.505859375, - "total_bits": 2886656.0, - "err": 0.0051433127373456955, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.818359375, - "total_bits": 3050496.0, - "err": 0.004441367462277412, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03271484375, - "total_bits": 3162880.0, - "err": 0.0045217489823699, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.130859375, - "total_bits": 3214336.0, - "err": 0.004556672647595406, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28271484375, - "total_bits": 3293952.0, - "err": 0.0036470615305006504, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.130859375, - "total_bits": 4262912.0, - "err": 0.003442938206717372, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.3.self_attn.v_proj", - "numel": 524288, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.193359375, - "total_bits": 1149952.0, - "err": 0.19431674480438232, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.380859375, - "total_bits": 1248256.0, - "err": 0.17733798921108246, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.630859375, - "total_bits": 1379328.0, - "err": 0.17033562064170837, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.755859375, - "total_bits": 1444864.0, - "err": 0.14970313012599945, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.255859375, - "total_bits": 1707008.0, - "err": 0.09001379460096359, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.693359375, - "total_bits": 1936384.0, - "err": 0.08425522595643997, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03271484375, - "total_bits": 1590016.0, - "err": 0.10771045833826065, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.130859375, - "total_bits": 1641472.0, - "err": 0.09748036414384842, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.193359375, - "total_bits": 1674240.0, - "err": 0.09286566823720932, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.505859375, - "total_bits": 1838080.0, - "err": 0.07968854904174805, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6904296875, - "total_bits": 1934848.0, - "err": 0.072355255484581, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03271484375, - "total_bits": 2114304.0, - "err": 0.05502503365278244, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.130859375, - "total_bits": 2165760.0, - "err": 0.046756990253925323, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.255859375, - "total_bits": 2231296.0, - "err": 0.04332320764660835, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.380859375, - "total_bits": 2296832.0, - "err": 0.04235811159014702, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03271484375, - "total_bits": 2638592.0, - "err": 0.02759157493710518, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.255859375, - "total_bits": 2755584.0, - "err": 0.021939728409051895, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.380859375, - "total_bits": 2821120.0, - "err": 0.02153830975294113, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.505859375, - "total_bits": 2886656.0, - "err": 0.019744468852877617, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.818359375, - "total_bits": 3050496.0, - "err": 0.018517812713980675, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03271484375, - "total_bits": 3162880.0, - "err": 0.014196627773344517, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.130859375, - "total_bits": 3214336.0, - "err": 0.013214776292443275, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28271484375, - "total_bits": 3293952.0, - "err": 0.012480269186198711, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.130859375, - "total_bits": 4262912.0, - "err": 0.007592487148940563, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.3.self_attn.o_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.188232421875, - "total_bits": 9178112.0, - "err": 0.15020354092121124, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 9964544.0, - "err": 0.12421592324972153, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 11013120.0, - "err": 0.11384402960538864, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750732421875, - "total_bits": 11537408.0, - "err": 0.09301099926233292, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250732421875, - "total_bits": 13634560.0, - "err": 0.06426145136356354, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.688232421875, - "total_bits": 15469568.0, - "err": 0.05652949586510658, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 12714752.0, - "err": 0.08447486162185669, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 13110272.0, - "err": 0.07639852166175842, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.188232421875, - "total_bits": 13372416.0, - "err": 0.07040274143218994, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500732421875, - "total_bits": 14683136.0, - "err": 0.053222235292196274, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6878662109375, - "total_bits": 15468032.0, - "err": 0.04880140721797943, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 16909056.0, - "err": 0.043165165930986404, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 17304576.0, - "err": 0.03719432279467583, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.250732421875, - "total_bits": 17828864.0, - "err": 0.031701162457466125, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.375732421875, - "total_bits": 18353152.0, - "err": 0.030270185321569443, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 21103360.0, - "err": 0.02202727645635605, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.250732421875, - "total_bits": 22023168.0, - "err": 0.01785166934132576, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375732421875, - "total_bits": 22547456.0, - "err": 0.017484338954091072, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.500732421875, - "total_bits": 23071744.0, - "err": 0.01591642200946808, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.813232421875, - "total_bits": 24382464.0, - "err": 0.014664570800960064, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 25297664.0, - "err": 0.012572018429636955, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 25693184.0, - "err": 0.013422134332358837, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28143310546875, - "total_bits": 26346240.0, - "err": 0.010969378985464573, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 34081792.0, - "err": 0.010601995512843132, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.3.mlp.gate_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.187766335227273, - "total_bits": 25234432.000000004, - "err": 0.2284248024225235, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375266335227273, - "total_bits": 27397120.000000004, - "err": 0.2144991010427475, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625266335227273, - "total_bits": 30280704.000000004, - "err": 0.20989319682121277, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750266335227273, - "total_bits": 31722496.000000004, - "err": 0.18924807012081146, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250266335227273, - "total_bits": 37489664.0, - "err": 0.10607375204563141, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.687766335227273, - "total_bits": 42535936.0, - "err": 0.10225116461515427, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313165838068183, - "total_bits": 34964224.0, - "err": 0.11889250576496124, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125266335227273, - "total_bits": 36047872.0, - "err": 0.10988693684339523, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.187766335227273, - "total_bits": 36768768.0, - "err": 0.10805615782737732, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500266335227273, - "total_bits": 40373248.0, - "err": 0.09691603481769562, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6876331676136362, - "total_bits": 42534400.0, - "err": 0.08917166292667389, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031316583806818, - "total_bits": 46498560.0, - "err": 0.060394369065761566, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1252663352272725, - "total_bits": 47582208.0, - "err": 0.052457842975854874, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2502663352272725, - "total_bits": 49024000.0, - "err": 0.050760168582201004, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3752663352272725, - "total_bits": 50465792.0, - "err": 0.050365738570690155, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031316583806818, - "total_bits": 58032896.0, - "err": 0.030181895941495895, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2502663352272725, - "total_bits": 60558336.0, - "err": 0.02608712762594223, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3752663352272725, - "total_bits": 62000128.0, - "err": 0.025968264788389206, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5002663352272725, - "total_bits": 63441920.0, - "err": 0.024226276203989983, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.8127663352272725, - "total_bits": 67046400.0, - "err": 0.023411551490426064, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031316583806818, - "total_bits": 69567232.0, - "err": 0.015882348641753197, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1252663352272725, - "total_bits": 70650880.0, - "err": 0.015756940469145775, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.281316583806818, - "total_bits": 72450816.0, - "err": 0.015343401581048965, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125266335227273, - "total_bits": 93719552.0, - "err": 0.010348601266741753, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.3.mlp.up_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.187766335227273, - "total_bits": 25234432.000000004, - "err": 0.24348098039627075, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375266335227273, - "total_bits": 27397120.000000004, - "err": 0.2287055104970932, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625266335227273, - "total_bits": 30280704.000000004, - "err": 0.22390908002853394, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750266335227273, - "total_bits": 31722496.000000004, - "err": 0.20201382040977478, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250266335227273, - "total_bits": 37489664.0, - "err": 0.11324422061443329, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.687766335227273, - "total_bits": 42535936.0, - "err": 0.10924870520830154, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313165838068183, - "total_bits": 34964224.0, - "err": 0.12676787376403809, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125266335227273, - "total_bits": 36047872.0, - "err": 0.11721473187208176, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.187766335227273, - "total_bits": 36768768.0, - "err": 0.11535989493131638, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500266335227273, - "total_bits": 40373248.0, - "err": 0.10345711559057236, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6876331676136362, - "total_bits": 42534400.0, - "err": 0.09520711749792099, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031316583806818, - "total_bits": 46498560.0, - "err": 0.06433085352182388, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1252663352272725, - "total_bits": 47582208.0, - "err": 0.055888935923576355, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2502663352272725, - "total_bits": 49024000.0, - "err": 0.054100241512060165, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3752663352272725, - "total_bits": 50465792.0, - "err": 0.05366349592804909, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031316583806818, - "total_bits": 58032896.0, - "err": 0.0320952907204628, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2502663352272725, - "total_bits": 60558336.0, - "err": 0.027509871870279312, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3752663352272725, - "total_bits": 62000128.0, - "err": 0.027398202568292618, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5002663352272725, - "total_bits": 63441920.0, - "err": 0.02549402043223381, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.8127663352272725, - "total_bits": 67046400.0, - "err": 0.024630311876535416, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031316583806818, - "total_bits": 69567232.0, - "err": 0.01667862944304943, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1252663352272725, - "total_bits": 70650880.0, - "err": 0.016211193054914474, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.281316583806818, - "total_bits": 72450816.0, - "err": 0.016078712418675423, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125266335227273, - "total_bits": 93719552.0, - "err": 0.010071336291730404, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.3.mlp.down_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1711869673295454, - "total_bits": 25043200.0, - "err": 0.09508547931909561, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 27402496.0, - "err": 0.08958464860916138, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 30286080.0, - "err": 0.06592251360416412, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.716641512784091, - "total_bits": 31334656.0, - "err": 0.06124173849821091, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.216641512784091, - "total_bits": 37101824.0, - "err": 0.04689856618642807, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7393687855113638, - "total_bits": 43131136.0, - "err": 0.030081136152148247, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 34965568.0, - "err": 0.06785013526678085, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 36053248.0, - "err": 0.07485980540513992, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1711869673295454, - "total_bits": 36577536.0, - "err": 0.047509752213954926, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.534823330965909, - "total_bits": 40771840.0, - "err": 0.04414854571223259, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.653775301846591, - "total_bits": 42143872.0, - "err": 0.0439470149576664, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 46499904.0, - "err": 0.04122862219810486, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 47587584.0, - "err": 0.03908888250589371, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.216641512784091, - "total_bits": 48636160.0, - "err": 0.02872363105416298, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.307550603693182, - "total_bits": 49684736.0, - "err": 0.019312337040901184, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 58034240.0, - "err": 0.02716570533812046, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.216641512784091, - "total_bits": 60170496.0, - "err": 0.015090730972588062, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.307550603693182, - "total_bits": 61219072.0, - "err": 0.008441930636763573, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.534823330965909, - "total_bits": 63840512.0, - "err": 0.014673218131065369, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.693914240056818, - "total_bits": 65675520.0, - "err": 0.00767515180632472, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 69568576.0, - "err": 0.014417712576687336, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 70656256.0, - "err": 0.013916991651058197, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.213251287286932, - "total_bits": 71665728.0, - "err": 0.005898985546082258, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 93724928.0, - "err": 0.005155712831765413, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.4.self_attn.q_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.188232421875, - "total_bits": 9178112.0, - "err": 0.07523850351572037, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 9964544.0, - "err": 0.06952866911888123, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 11013120.0, - "err": 0.06337545812129974, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750732421875, - "total_bits": 11537408.0, - "err": 0.05630020797252655, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250732421875, - "total_bits": 13634560.0, - "err": 0.03460569679737091, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.688232421875, - "total_bits": 15469568.0, - "err": 0.030919170007109642, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 12714752.0, - "err": 0.04586625471711159, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 13110272.0, - "err": 0.04276902973651886, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.188232421875, - "total_bits": 13372416.0, - "err": 0.035500988364219666, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500732421875, - "total_bits": 14683136.0, - "err": 0.031219936907291412, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6878662109375, - "total_bits": 15468032.0, - "err": 0.02899676188826561, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 16909056.0, - "err": 0.02323724515736103, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 17304576.0, - "err": 0.020261600613594055, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.250732421875, - "total_bits": 17828864.0, - "err": 0.016669128090143204, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.375732421875, - "total_bits": 18353152.0, - "err": 0.015738319605588913, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 21103360.0, - "err": 0.01166993286460638, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.250732421875, - "total_bits": 22023168.0, - "err": 0.0087022315710783, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375732421875, - "total_bits": 22547456.0, - "err": 0.008210449479520321, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.500732421875, - "total_bits": 23071744.0, - "err": 0.008022216148674488, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.813232421875, - "total_bits": 24382464.0, - "err": 0.007239452563226223, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 25297664.0, - "err": 0.006190714426338673, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 25693184.0, - "err": 0.005967143923044205, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28143310546875, - "total_bits": 26346240.0, - "err": 0.005045920144766569, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 34081792.0, - "err": 0.0037990324199199677, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.4.self_attn.k_proj", - "numel": 524288, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.193359375, - "total_bits": 1149952.0, - "err": 0.062111686915159225, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.380859375, - "total_bits": 1248256.0, - "err": 0.05643482133746147, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.630859375, - "total_bits": 1379328.0, - "err": 0.052837394177913666, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.755859375, - "total_bits": 1444864.0, - "err": 0.04648018255829811, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.255859375, - "total_bits": 1707008.0, - "err": 0.028155934065580368, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.693359375, - "total_bits": 1936384.0, - "err": 0.02575705200433731, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03271484375, - "total_bits": 1590016.0, - "err": 0.03509657829999924, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.130859375, - "total_bits": 1641472.0, - "err": 0.032514333724975586, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.193359375, - "total_bits": 1674240.0, - "err": 0.02903253771364689, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.505859375, - "total_bits": 1838080.0, - "err": 0.02495579980313778, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6904296875, - "total_bits": 1934848.0, - "err": 0.022797560319304466, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03271484375, - "total_bits": 2114304.0, - "err": 0.017618117853999138, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.130859375, - "total_bits": 2165760.0, - "err": 0.015370012260973454, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.255859375, - "total_bits": 2231296.0, - "err": 0.01350715383887291, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.380859375, - "total_bits": 2296832.0, - "err": 0.013031046837568283, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03271484375, - "total_bits": 2638592.0, - "err": 0.008750079199671745, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.255859375, - "total_bits": 2755584.0, - "err": 0.006924652028828859, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.380859375, - "total_bits": 2821120.0, - "err": 0.00670106615871191, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.505859375, - "total_bits": 2886656.0, - "err": 0.00625756336376071, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.818359375, - "total_bits": 3050496.0, - "err": 0.0057803490199148655, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03271484375, - "total_bits": 3162880.0, - "err": 0.004547615069895983, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.130859375, - "total_bits": 3214336.0, - "err": 0.0044322567991912365, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28271484375, - "total_bits": 3293952.0, - "err": 0.003934780135750771, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.130859375, - "total_bits": 4262912.0, - "err": 0.002706074621528387, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.4.self_attn.v_proj", - "numel": 524288, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.193359375, - "total_bits": 1149952.0, - "err": 0.20915861427783966, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.380859375, - "total_bits": 1248256.0, - "err": 0.19307254254817963, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.630859375, - "total_bits": 1379328.0, - "err": 0.18661370873451233, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.755859375, - "total_bits": 1444864.0, - "err": 0.16574090719223022, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.255859375, - "total_bits": 1707008.0, - "err": 0.09664040058851242, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.693359375, - "total_bits": 1936384.0, - "err": 0.09173011034727097, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03271484375, - "total_bits": 1590016.0, - "err": 0.11243954300880432, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.130859375, - "total_bits": 1641472.0, - "err": 0.10307908058166504, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.193359375, - "total_bits": 1674240.0, - "err": 0.09909282624721527, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.505859375, - "total_bits": 1838080.0, - "err": 0.0867237001657486, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6904296875, - "total_bits": 1934848.0, - "err": 0.07928111404180527, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03271484375, - "total_bits": 2114304.0, - "err": 0.05730857700109482, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.130859375, - "total_bits": 2165760.0, - "err": 0.04922378063201904, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.255859375, - "total_bits": 2231296.0, - "err": 0.046125344932079315, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.380859375, - "total_bits": 2296832.0, - "err": 0.04537394642829895, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03271484375, - "total_bits": 2638592.0, - "err": 0.028579657897353172, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.255859375, - "total_bits": 2755584.0, - "err": 0.02335955761373043, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.380859375, - "total_bits": 2821120.0, - "err": 0.023074578493833542, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.505859375, - "total_bits": 2886656.0, - "err": 0.021280253306031227, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.818359375, - "total_bits": 3050496.0, - "err": 0.02020930126309395, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03271484375, - "total_bits": 3162880.0, - "err": 0.01458539068698883, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.130859375, - "total_bits": 3214336.0, - "err": 0.013844029046595097, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28271484375, - "total_bits": 3293952.0, - "err": 0.013350239023566246, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.130859375, - "total_bits": 4262912.0, - "err": 0.007957910187542439, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.4.self_attn.o_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.188232421875, - "total_bits": 9178112.0, - "err": 0.20946843922138214, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 9964544.0, - "err": 0.16572614014148712, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 11013120.0, - "err": 0.14996030926704407, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750732421875, - "total_bits": 11537408.0, - "err": 0.1308881938457489, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250732421875, - "total_bits": 13634560.0, - "err": 0.09134463965892792, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.688232421875, - "total_bits": 15469568.0, - "err": 0.07643533498048782, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 12714752.0, - "err": 0.11394781619310379, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 13110272.0, - "err": 0.10536210983991623, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.188232421875, - "total_bits": 13372416.0, - "err": 0.10011587291955948, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500732421875, - "total_bits": 14683136.0, - "err": 0.0736493170261383, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6878662109375, - "total_bits": 15468032.0, - "err": 0.0671987235546112, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 16909056.0, - "err": 0.05931726098060608, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 17304576.0, - "err": 0.051468394696712494, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.250732421875, - "total_bits": 17828864.0, - "err": 0.04513535648584366, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.375732421875, - "total_bits": 18353152.0, - "err": 0.04354066401720047, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 21103360.0, - "err": 0.03049270436167717, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.250732421875, - "total_bits": 22023168.0, - "err": 0.025353724136948586, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375732421875, - "total_bits": 22547456.0, - "err": 0.025018662214279175, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.500732421875, - "total_bits": 23071744.0, - "err": 0.02219446562230587, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.813232421875, - "total_bits": 24382464.0, - "err": 0.02071346901357174, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 25297664.0, - "err": 0.01741728186607361, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 25693184.0, - "err": 0.018743742257356644, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28143310546875, - "total_bits": 26346240.0, - "err": 0.015486118383705616, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 34081792.0, - "err": 0.014866792596876621, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.4.mlp.gate_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.187766335227273, - "total_bits": 25234432.000000004, - "err": 0.23813189566135406, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375266335227273, - "total_bits": 27397120.000000004, - "err": 0.22344247996807098, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625266335227273, - "total_bits": 30280704.000000004, - "err": 0.21860817074775696, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750266335227273, - "total_bits": 31722496.000000004, - "err": 0.19703103601932526, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250266335227273, - "total_bits": 37489664.0, - "err": 0.11095250397920609, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.687766335227273, - "total_bits": 42535936.0, - "err": 0.10691363364458084, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313165838068183, - "total_bits": 34964224.0, - "err": 0.12425971031188965, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125266335227273, - "total_bits": 36047872.0, - "err": 0.11494378000497818, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.187766335227273, - "total_bits": 36768768.0, - "err": 0.11298102885484695, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500266335227273, - "total_bits": 40373248.0, - "err": 0.10127235949039459, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6876331676136362, - "total_bits": 42534400.0, - "err": 0.09302138537168503, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031316583806818, - "total_bits": 46498560.0, - "err": 0.06321028620004654, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1252663352272725, - "total_bits": 47582208.0, - "err": 0.0549309179186821, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2502663352272725, - "total_bits": 49024000.0, - "err": 0.05308330059051514, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3752663352272725, - "total_bits": 50465792.0, - "err": 0.05264490842819214, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031316583806818, - "total_bits": 58032896.0, - "err": 0.03158082067966461, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2502663352272725, - "total_bits": 60558336.0, - "err": 0.027203375473618507, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3752663352272725, - "total_bits": 62000128.0, - "err": 0.027075769379734993, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5002663352272725, - "total_bits": 63441920.0, - "err": 0.02522306889295578, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.8127663352272725, - "total_bits": 67046400.0, - "err": 0.024367645382881165, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031316583806818, - "total_bits": 69567232.0, - "err": 0.0165382269769907, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1252663352272725, - "total_bits": 70650880.0, - "err": 0.01633160002529621, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.281316583806818, - "total_bits": 72450816.0, - "err": 0.015933463349938393, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125266335227273, - "total_bits": 93719552.0, - "err": 0.01056537963449955, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.4.mlp.up_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.187766335227273, - "total_bits": 25234432.000000004, - "err": 0.25414833426475525, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375266335227273, - "total_bits": 27397120.000000004, - "err": 0.2387564331293106, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625266335227273, - "total_bits": 30280704.000000004, - "err": 0.2337283492088318, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750266335227273, - "total_bits": 31722496.000000004, - "err": 0.2105938047170639, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250266335227273, - "total_bits": 37489664.0, - "err": 0.11850133538246155, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.687766335227273, - "total_bits": 42535936.0, - "err": 0.11424538493156433, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313165838068183, - "total_bits": 34964224.0, - "err": 0.1326080858707428, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125266335227273, - "total_bits": 36047872.0, - "err": 0.12271972000598907, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.187766335227273, - "total_bits": 36768768.0, - "err": 0.1206800565123558, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500266335227273, - "total_bits": 40373248.0, - "err": 0.10818898677825928, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6876331676136362, - "total_bits": 42534400.0, - "err": 0.09941955655813217, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031316583806818, - "total_bits": 46498560.0, - "err": 0.06742385029792786, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1252663352272725, - "total_bits": 47582208.0, - "err": 0.05855894088745117, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2502663352272725, - "total_bits": 49024000.0, - "err": 0.056643784046173096, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3752663352272725, - "total_bits": 50465792.0, - "err": 0.05616461858153343, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031316583806818, - "total_bits": 58032896.0, - "err": 0.03366408869624138, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2502663352272725, - "total_bits": 60558336.0, - "err": 0.02882140688598156, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3752663352272725, - "total_bits": 62000128.0, - "err": 0.028687993064522743, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5002663352272725, - "total_bits": 63441920.0, - "err": 0.026701636612415314, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.8127663352272725, - "total_bits": 67046400.0, - "err": 0.025774750858545303, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031316583806818, - "total_bits": 69567232.0, - "err": 0.01748056709766388, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1252663352272725, - "total_bits": 70650880.0, - "err": 0.017009977251291275, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.281316583806818, - "total_bits": 72450816.0, - "err": 0.016840951517224312, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125266335227273, - "total_bits": 93719552.0, - "err": 0.010601846501231194, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.4.mlp.down_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1711869673295454, - "total_bits": 25043200.0, - "err": 0.22677956521511078, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 27402496.0, - "err": 0.20376160740852356, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 30286080.0, - "err": 0.19483405351638794, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.716641512784091, - "total_bits": 31334656.0, - "err": 0.1742590069770813, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.216641512784091, - "total_bits": 37101824.0, - "err": 0.10470637679100037, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7393687855113638, - "total_bits": 43131136.0, - "err": 0.09525527060031891, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 34965568.0, - "err": 0.12218808382749557, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 36053248.0, - "err": 0.11244605481624603, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1711869673295454, - "total_bits": 36577536.0, - "err": 0.10759449005126953, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.534823330965909, - "total_bits": 40771840.0, - "err": 0.09026801586151123, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.653775301846591, - "total_bits": 42143872.0, - "err": 0.08594037592411041, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 46499904.0, - "err": 0.0621471144258976, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 47587584.0, - "err": 0.05402113124728203, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.216641512784091, - "total_bits": 48636160.0, - "err": 0.050528381019830704, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.307550603693182, - "total_bits": 49684736.0, - "err": 0.04969023913145065, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 58034240.0, - "err": 0.0311899334192276, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.216641512784091, - "total_bits": 60170496.0, - "err": 0.02665659412741661, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.307550603693182, - "total_bits": 61219072.0, - "err": 0.02635314129292965, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.534823330965909, - "total_bits": 63840512.0, - "err": 0.023832641541957855, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.693914240056818, - "total_bits": 65675520.0, - "err": 0.023504218086600304, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 69568576.0, - "err": 0.016884084790945053, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 70656256.0, - "err": 0.017372146248817444, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.213251287286932, - "total_bits": 71665728.0, - "err": 0.01579555869102478, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 93724928.0, - "err": 0.01240752823650837, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.5.self_attn.q_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.188232421875, - "total_bits": 9178112.0, - "err": 0.07141759246587753, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 9964544.0, - "err": 0.06613820046186447, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 11013120.0, - "err": 0.06101033464074135, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750732421875, - "total_bits": 11537408.0, - "err": 0.05429625138640404, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250732421875, - "total_bits": 13634560.0, - "err": 0.03321903944015503, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.688232421875, - "total_bits": 15469568.0, - "err": 0.02990960329771042, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 12714752.0, - "err": 0.04334200173616409, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 13110272.0, - "err": 0.03994650021195412, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.188232421875, - "total_bits": 13372416.0, - "err": 0.03404781594872475, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500732421875, - "total_bits": 14683136.0, - "err": 0.03000490739941597, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6878662109375, - "total_bits": 15468032.0, - "err": 0.02788688987493515, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 16909056.0, - "err": 0.021924041211605072, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 17304576.0, - "err": 0.019215982407331467, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.250732421875, - "total_bits": 17828864.0, - "err": 0.016002630814909935, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.375732421875, - "total_bits": 18353152.0, - "err": 0.015189127996563911, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 21103360.0, - "err": 0.01100062858313322, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.250732421875, - "total_bits": 22023168.0, - "err": 0.008363770321011543, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375732421875, - "total_bits": 22547456.0, - "err": 0.007963758893311024, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.500732421875, - "total_bits": 23071744.0, - "err": 0.007717495784163475, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.813232421875, - "total_bits": 24382464.0, - "err": 0.007046622224152088, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 25297664.0, - "err": 0.005859783384948969, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 25693184.0, - "err": 0.005693401675671339, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28143310546875, - "total_bits": 26346240.0, - "err": 0.004914121702313423, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 34081792.0, - "err": 0.003723322181031108, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.5.self_attn.k_proj", - "numel": 524288, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.193359375, - "total_bits": 1149952.0, - "err": 0.057523343712091446, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.380859375, - "total_bits": 1248256.0, - "err": 0.05255451425909996, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.630859375, - "total_bits": 1379328.0, - "err": 0.049217965453863144, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.755859375, - "total_bits": 1444864.0, - "err": 0.043303027749061584, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.255859375, - "total_bits": 1707008.0, - "err": 0.026317663490772247, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.693359375, - "total_bits": 1936384.0, - "err": 0.024034494534134865, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03271484375, - "total_bits": 1590016.0, - "err": 0.03314166143536568, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.130859375, - "total_bits": 1641472.0, - "err": 0.030573105439543724, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.193359375, - "total_bits": 1674240.0, - "err": 0.027149982750415802, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.505859375, - "total_bits": 1838080.0, - "err": 0.023483989760279655, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6904296875, - "total_bits": 1934848.0, - "err": 0.021542225033044815, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03271484375, - "total_bits": 2114304.0, - "err": 0.016658833250403404, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.130859375, - "total_bits": 2165760.0, - "err": 0.014602280221879482, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.255859375, - "total_bits": 2231296.0, - "err": 0.012663994915783405, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.380859375, - "total_bits": 2296832.0, - "err": 0.012163079343736172, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03271484375, - "total_bits": 2638592.0, - "err": 0.0083093773573637, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.255859375, - "total_bits": 2755584.0, - "err": 0.006524809170514345, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.380859375, - "total_bits": 2821120.0, - "err": 0.006288773380219936, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.505859375, - "total_bits": 2886656.0, - "err": 0.0059333257377147675, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.818359375, - "total_bits": 3050496.0, - "err": 0.005467843730002642, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03271484375, - "total_bits": 3162880.0, - "err": 0.004377619829028845, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.130859375, - "total_bits": 3214336.0, - "err": 0.0042204177007079124, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28271484375, - "total_bits": 3293952.0, - "err": 0.003761697793379426, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.130859375, - "total_bits": 4262912.0, - "err": 0.002624987857416272, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.5.self_attn.v_proj", - "numel": 524288, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.193359375, - "total_bits": 1149952.0, - "err": 0.2246181070804596, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.380859375, - "total_bits": 1248256.0, - "err": 0.20728051662445068, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.630859375, - "total_bits": 1379328.0, - "err": 0.20024777948856354, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.755859375, - "total_bits": 1444864.0, - "err": 0.1776912957429886, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.255859375, - "total_bits": 1707008.0, - "err": 0.1042361855506897, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.693359375, - "total_bits": 1936384.0, - "err": 0.09849043935537338, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03271484375, - "total_bits": 1590016.0, - "err": 0.1218767985701561, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.130859375, - "total_bits": 1641472.0, - "err": 0.11144556850194931, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.193359375, - "total_bits": 1674240.0, - "err": 0.10681717097759247, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.505859375, - "total_bits": 1838080.0, - "err": 0.09322589635848999, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6904296875, - "total_bits": 1934848.0, - "err": 0.0856095552444458, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03271484375, - "total_bits": 2114304.0, - "err": 0.0626448392868042, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.130859375, - "total_bits": 2165760.0, - "err": 0.053413379937410355, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.255859375, - "total_bits": 2231296.0, - "err": 0.04985769838094711, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.380859375, - "total_bits": 2296832.0, - "err": 0.04896377772092819, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03271484375, - "total_bits": 2638592.0, - "err": 0.03125659376382828, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.255859375, - "total_bits": 2755584.0, - "err": 0.0251738503575325, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.380859375, - "total_bits": 2821120.0, - "err": 0.02482321672141552, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.505859375, - "total_bits": 2886656.0, - "err": 0.022853519767522812, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.818359375, - "total_bits": 3050496.0, - "err": 0.021607104688882828, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03271484375, - "total_bits": 3162880.0, - "err": 0.015921087935566902, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.130859375, - "total_bits": 3214336.0, - "err": 0.014896020293235779, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28271484375, - "total_bits": 3293952.0, - "err": 0.014389194548130035, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.130859375, - "total_bits": 4262912.0, - "err": 0.008486750535666943, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.5.self_attn.o_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.188232421875, - "total_bits": 9178112.0, - "err": 0.20642368495464325, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 9964544.0, - "err": 0.16903437674045563, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 11013120.0, - "err": 0.15404462814331055, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750732421875, - "total_bits": 11537408.0, - "err": 0.12444361299276352, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250732421875, - "total_bits": 13634560.0, - "err": 0.0908680409193039, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.688232421875, - "total_bits": 15469568.0, - "err": 0.07960290461778641, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 12714752.0, - "err": 0.11580514162778854, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 13110272.0, - "err": 0.10573221743106842, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.188232421875, - "total_bits": 13372416.0, - "err": 0.09806300699710846, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500732421875, - "total_bits": 14683136.0, - "err": 0.07046789675951004, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6878662109375, - "total_bits": 15468032.0, - "err": 0.06569002568721771, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 16909056.0, - "err": 0.05929147079586983, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 17304576.0, - "err": 0.05142153054475784, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.250732421875, - "total_bits": 17828864.0, - "err": 0.044756997376680374, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.375732421875, - "total_bits": 18353152.0, - "err": 0.043072231113910675, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 21103360.0, - "err": 0.030009640380740166, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.250732421875, - "total_bits": 22023168.0, - "err": 0.024916736409068108, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375732421875, - "total_bits": 22547456.0, - "err": 0.024428661912679672, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.500732421875, - "total_bits": 23071744.0, - "err": 0.021314101293683052, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.813232421875, - "total_bits": 24382464.0, - "err": 0.019766876474022865, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 25297664.0, - "err": 0.016933584585785866, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 25693184.0, - "err": 0.0183287151157856, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28143310546875, - "total_bits": 26346240.0, - "err": 0.014982807449996471, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 34081792.0, - "err": 0.014342288486659527, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.5.mlp.gate_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.187766335227273, - "total_bits": 25234432.000000004, - "err": 0.2426733374595642, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375266335227273, - "total_bits": 27397120.000000004, - "err": 0.22773465514183044, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625266335227273, - "total_bits": 30280704.000000004, - "err": 0.22283929586410522, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750266335227273, - "total_bits": 31722496.000000004, - "err": 0.20059773325920105, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250266335227273, - "total_bits": 37489664.0, - "err": 0.11335550993680954, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.687766335227273, - "total_bits": 42535936.0, - "err": 0.10941295325756073, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313165838068183, - "total_bits": 34964224.0, - "err": 0.12712429463863373, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125266335227273, - "total_bits": 36047872.0, - "err": 0.11753369867801666, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.187766335227273, - "total_bits": 36768768.0, - "err": 0.11558496206998825, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500266335227273, - "total_bits": 40373248.0, - "err": 0.10344721376895905, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6876331676136362, - "total_bits": 42534400.0, - "err": 0.0950608178973198, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031316583806818, - "total_bits": 46498560.0, - "err": 0.06483884155750275, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1252663352272725, - "total_bits": 47582208.0, - "err": 0.056273527443408966, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2502663352272725, - "total_bits": 49024000.0, - "err": 0.05440318584442139, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3752663352272725, - "total_bits": 50465792.0, - "err": 0.053962089121341705, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031316583806818, - "total_bits": 58032896.0, - "err": 0.032453060150146484, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2502663352272725, - "total_bits": 60558336.0, - "err": 0.02816425822675228, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3752663352272725, - "total_bits": 62000128.0, - "err": 0.028045430779457092, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5002663352272725, - "total_bits": 63441920.0, - "err": 0.026162439957261086, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.8127663352272725, - "total_bits": 67046400.0, - "err": 0.025291210040450096, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031316583806818, - "total_bits": 69567232.0, - "err": 0.017204493284225464, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1252663352272725, - "total_bits": 70650880.0, - "err": 0.01731610856950283, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.281316583806818, - "total_bits": 72450816.0, - "err": 0.016591746360063553, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125266335227273, - "total_bits": 93719552.0, - "err": 0.011755081824958324, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.5.mlp.up_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.187766335227273, - "total_bits": 25234432.000000004, - "err": 0.2637983560562134, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375266335227273, - "total_bits": 27397120.000000004, - "err": 0.24760982394218445, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625266335227273, - "total_bits": 30280704.000000004, - "err": 0.2425052970647812, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750266335227273, - "total_bits": 31722496.000000004, - "err": 0.2184246927499771, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250266335227273, - "total_bits": 37489664.0, - "err": 0.12322451174259186, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.687766335227273, - "total_bits": 42535936.0, - "err": 0.11886321753263474, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313165838068183, - "total_bits": 34964224.0, - "err": 0.13801756501197815, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125266335227273, - "total_bits": 36047872.0, - "err": 0.12758004665374756, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.187766335227273, - "total_bits": 36768768.0, - "err": 0.12549929320812225, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500266335227273, - "total_bits": 40373248.0, - "err": 0.11247245222330093, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6876331676136362, - "total_bits": 42534400.0, - "err": 0.10325728356838226, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031316583806818, - "total_bits": 46498560.0, - "err": 0.07012390345335007, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1252663352272725, - "total_bits": 47582208.0, - "err": 0.06088097020983696, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2502663352272725, - "total_bits": 49024000.0, - "err": 0.05890774354338646, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3752663352272725, - "total_bits": 50465792.0, - "err": 0.05842602998018265, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031316583806818, - "total_bits": 58032896.0, - "err": 0.035017453134059906, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2502663352272725, - "total_bits": 60558336.0, - "err": 0.029973387718200684, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3752663352272725, - "total_bits": 62000128.0, - "err": 0.029840586706995964, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5002663352272725, - "total_bits": 63441920.0, - "err": 0.027743767946958542, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.8127663352272725, - "total_bits": 67046400.0, - "err": 0.026781607419252396, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031316583806818, - "total_bits": 69567232.0, - "err": 0.018143806606531143, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1252663352272725, - "total_bits": 70650880.0, - "err": 0.017654025927186012, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.281316583806818, - "total_bits": 72450816.0, - "err": 0.017474504187703133, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125266335227273, - "total_bits": 93719552.0, - "err": 0.010948625393211842, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.5.mlp.down_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1711869673295454, - "total_bits": 25043200.0, - "err": 0.23646031320095062, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 27402496.0, - "err": 0.213955819606781, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 30286080.0, - "err": 0.2053072452545166, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.716641512784091, - "total_bits": 31334656.0, - "err": 0.1843481957912445, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.216641512784091, - "total_bits": 37101824.0, - "err": 0.10930653661489487, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7393687855113638, - "total_bits": 43131136.0, - "err": 0.09995904564857483, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 34965568.0, - "err": 0.12645761668682098, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 36053248.0, - "err": 0.11676555126905441, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1711869673295454, - "total_bits": 36577536.0, - "err": 0.11206982284784317, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.534823330965909, - "total_bits": 40771840.0, - "err": 0.09487729519605637, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.653775301846591, - "total_bits": 42143872.0, - "err": 0.09068988263607025, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 46499904.0, - "err": 0.06453604251146317, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 47587584.0, - "err": 0.056207325309515, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.216641512784091, - "total_bits": 48636160.0, - "err": 0.05269940197467804, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.307550603693182, - "total_bits": 49684736.0, - "err": 0.051872383803129196, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 58034240.0, - "err": 0.03252037987112999, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.216641512784091, - "total_bits": 60170496.0, - "err": 0.02775595709681511, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.307550603693182, - "total_bits": 61219072.0, - "err": 0.027420440688729286, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.534823330965909, - "total_bits": 63840512.0, - "err": 0.02493145875632763, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.693914240056818, - "total_bits": 65675520.0, - "err": 0.024585476145148277, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 69568576.0, - "err": 0.017596453428268433, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 70656256.0, - "err": 0.017978738993406296, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.213251287286932, - "total_bits": 71665728.0, - "err": 0.016483917832374573, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 93724928.0, - "err": 0.012757441028952599, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.6.self_attn.q_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.188232421875, - "total_bits": 9178112.0, - "err": 0.0707504153251648, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 9964544.0, - "err": 0.06482192128896713, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 11013120.0, - "err": 0.060195982456207275, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750732421875, - "total_bits": 11537408.0, - "err": 0.05334055796265602, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250732421875, - "total_bits": 13634560.0, - "err": 0.03277721628546715, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.688232421875, - "total_bits": 15469568.0, - "err": 0.029666995629668236, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 12714752.0, - "err": 0.041971851140260696, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 13110272.0, - "err": 0.0386744849383831, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.188232421875, - "total_bits": 13372416.0, - "err": 0.033713199198246, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500732421875, - "total_bits": 14683136.0, - "err": 0.02930072881281376, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6878662109375, - "total_bits": 15468032.0, - "err": 0.027102502062916756, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 16909056.0, - "err": 0.02136627398431301, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 17304576.0, - "err": 0.018659867346286774, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.250732421875, - "total_bits": 17828864.0, - "err": 0.015873564407229424, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.375732421875, - "total_bits": 18353152.0, - "err": 0.015154621563851833, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 21103360.0, - "err": 0.010812688618898392, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.250732421875, - "total_bits": 22023168.0, - "err": 0.008403616957366467, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375732421875, - "total_bits": 22547456.0, - "err": 0.008061161264777184, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.500732421875, - "total_bits": 23071744.0, - "err": 0.007721506059169769, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.813232421875, - "total_bits": 24382464.0, - "err": 0.0071039507165551186, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 25297664.0, - "err": 0.005847516935318708, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 25693184.0, - "err": 0.005787878297269344, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28143310546875, - "total_bits": 26346240.0, - "err": 0.004979821853339672, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 34081792.0, - "err": 0.003968972247093916, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.6.self_attn.k_proj", - "numel": 524288, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.193359375, - "total_bits": 1149952.0, - "err": 0.06502456218004227, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.380859375, - "total_bits": 1248256.0, - "err": 0.05836605653166771, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.630859375, - "total_bits": 1379328.0, - "err": 0.054566577076911926, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.755859375, - "total_bits": 1444864.0, - "err": 0.04775873199105263, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.255859375, - "total_bits": 1707008.0, - "err": 0.029467638581991196, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.693359375, - "total_bits": 1936384.0, - "err": 0.026738081127405167, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03271484375, - "total_bits": 1590016.0, - "err": 0.03685346990823746, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.130859375, - "total_bits": 1641472.0, - "err": 0.034150492399930954, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.193359375, - "total_bits": 1674240.0, - "err": 0.03050423227250576, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.505859375, - "total_bits": 1838080.0, - "err": 0.02596048265695572, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6904296875, - "total_bits": 1934848.0, - "err": 0.023676693439483643, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03271484375, - "total_bits": 2114304.0, - "err": 0.018553754314780235, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.130859375, - "total_bits": 2165760.0, - "err": 0.016395941376686096, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.255859375, - "total_bits": 2231296.0, - "err": 0.014163062907755375, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.380859375, - "total_bits": 2296832.0, - "err": 0.013598752208054066, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03271484375, - "total_bits": 2638592.0, - "err": 0.009292250499129295, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.255859375, - "total_bits": 2755584.0, - "err": 0.007284657564014196, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.380859375, - "total_bits": 2821120.0, - "err": 0.007014983333647251, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.505859375, - "total_bits": 2886656.0, - "err": 0.006565834861248732, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.818359375, - "total_bits": 3050496.0, - "err": 0.006018944084644318, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03271484375, - "total_bits": 3162880.0, - "err": 0.004834007006138563, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.130859375, - "total_bits": 3214336.0, - "err": 0.004741380922496319, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28271484375, - "total_bits": 3293952.0, - "err": 0.0041180821135640144, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.130859375, - "total_bits": 4262912.0, - "err": 0.0029224592726677656, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.6.self_attn.v_proj", - "numel": 524288, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.193359375, - "total_bits": 1149952.0, - "err": 0.20745648443698883, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.380859375, - "total_bits": 1248256.0, - "err": 0.19148553907871246, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.630859375, - "total_bits": 1379328.0, - "err": 0.18471306562423706, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.755859375, - "total_bits": 1444864.0, - "err": 0.16435201466083527, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.255859375, - "total_bits": 1707008.0, - "err": 0.09576380252838135, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.693359375, - "total_bits": 1936384.0, - "err": 0.09089380502700806, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03271484375, - "total_bits": 1590016.0, - "err": 0.11287317425012589, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.130859375, - "total_bits": 1641472.0, - "err": 0.10285335034132004, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.193359375, - "total_bits": 1674240.0, - "err": 0.09841288626194, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.505859375, - "total_bits": 1838080.0, - "err": 0.08608216047286987, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6904296875, - "total_bits": 1934848.0, - "err": 0.07896454632282257, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03271484375, - "total_bits": 2114304.0, - "err": 0.05748699977993965, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.130859375, - "total_bits": 2165760.0, - "err": 0.049088627099990845, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.255859375, - "total_bits": 2231296.0, - "err": 0.045957863330841064, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.380859375, - "total_bits": 2296832.0, - "err": 0.045250408351421356, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03271484375, - "total_bits": 2638592.0, - "err": 0.028691556304693222, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.255859375, - "total_bits": 2755584.0, - "err": 0.02322896011173725, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.380859375, - "total_bits": 2821120.0, - "err": 0.022924069315195084, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.505859375, - "total_bits": 2886656.0, - "err": 0.02115500532090664, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.818359375, - "total_bits": 3050496.0, - "err": 0.020083602517843246, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03271484375, - "total_bits": 3162880.0, - "err": 0.014634138904511929, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.130859375, - "total_bits": 3214336.0, - "err": 0.013750890269875526, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28271484375, - "total_bits": 3293952.0, - "err": 0.013306026346981525, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.130859375, - "total_bits": 4262912.0, - "err": 0.007926496677100658, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.6.self_attn.o_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.188232421875, - "total_bits": 9178112.0, - "err": 0.1766517460346222, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 9964544.0, - "err": 0.15084294974803925, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 11013120.0, - "err": 0.1366235911846161, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750732421875, - "total_bits": 11537408.0, - "err": 0.1137942522764206, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250732421875, - "total_bits": 13634560.0, - "err": 0.07952874898910522, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.688232421875, - "total_bits": 15469568.0, - "err": 0.07006972283124924, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 12714752.0, - "err": 0.10712838172912598, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 13110272.0, - "err": 0.09566783905029297, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.188232421875, - "total_bits": 13372416.0, - "err": 0.08426501601934433, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500732421875, - "total_bits": 14683136.0, - "err": 0.06567420810461044, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6878662109375, - "total_bits": 15468032.0, - "err": 0.060237519443035126, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 16909056.0, - "err": 0.05496438592672348, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 17304576.0, - "err": 0.04670612886548042, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.250732421875, - "total_bits": 17828864.0, - "err": 0.03932724520564079, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.375732421875, - "total_bits": 18353152.0, - "err": 0.0374404639005661, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 21103360.0, - "err": 0.0278523750603199, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.250732421875, - "total_bits": 22023168.0, - "err": 0.022008242085576057, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375732421875, - "total_bits": 22547456.0, - "err": 0.021309152245521545, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.500732421875, - "total_bits": 23071744.0, - "err": 0.019532982259988785, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.813232421875, - "total_bits": 24382464.0, - "err": 0.017934981733560562, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 25297664.0, - "err": 0.015726033598184586, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 25693184.0, - "err": 0.016513021662831306, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28143310546875, - "total_bits": 26346240.0, - "err": 0.01342580746859312, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 34081792.0, - "err": 0.01287003792822361, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.6.mlp.gate_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.187766335227273, - "total_bits": 25234432.000000004, - "err": 0.23122207820415497, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375266335227273, - "total_bits": 27397120.000000004, - "err": 0.2172355204820633, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625266335227273, - "total_bits": 30280704.000000004, - "err": 0.21252821385860443, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750266335227273, - "total_bits": 31722496.000000004, - "err": 0.19140678644180298, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250266335227273, - "total_bits": 37489664.0, - "err": 0.10817069560289383, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.687766335227273, - "total_bits": 42535936.0, - "err": 0.10418495535850525, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313165838068183, - "total_bits": 34964224.0, - "err": 0.121353879570961, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125266335227273, - "total_bits": 36047872.0, - "err": 0.11223197728395462, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.187766335227273, - "total_bits": 36768768.0, - "err": 0.1101284995675087, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500266335227273, - "total_bits": 40373248.0, - "err": 0.0986490547657013, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6876331676136362, - "total_bits": 42534400.0, - "err": 0.09072840958833694, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031316583806818, - "total_bits": 46498560.0, - "err": 0.06183381378650665, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1252663352272725, - "total_bits": 47582208.0, - "err": 0.05370187386870384, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2502663352272725, - "total_bits": 49024000.0, - "err": 0.05184582248330116, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3752663352272725, - "total_bits": 50465792.0, - "err": 0.05137842893600464, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031316583806818, - "total_bits": 58032896.0, - "err": 0.030894573777914047, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2502663352272725, - "total_bits": 60558336.0, - "err": 0.026712344959378242, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3752663352272725, - "total_bits": 62000128.0, - "err": 0.02658676914870739, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5002663352272725, - "total_bits": 63441920.0, - "err": 0.024806056171655655, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.8127663352272725, - "total_bits": 67046400.0, - "err": 0.023957349359989166, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031316583806818, - "total_bits": 69567232.0, - "err": 0.016217578202486038, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1252663352272725, - "total_bits": 70650880.0, - "err": 0.01629963144659996, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.281316583806818, - "total_bits": 72450816.0, - "err": 0.015582006424665451, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125266335227273, - "total_bits": 93719552.0, - "err": 0.010847135446965694, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.6.mlp.up_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.187766335227273, - "total_bits": 25234432.000000004, - "err": 0.25514838099479675, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375266335227273, - "total_bits": 27397120.000000004, - "err": 0.23969322443008423, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625266335227273, - "total_bits": 30280704.000000004, - "err": 0.23457472026348114, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750266335227273, - "total_bits": 31722496.000000004, - "err": 0.21151304244995117, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250266335227273, - "total_bits": 37489664.0, - "err": 0.11931431293487549, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.687766335227273, - "total_bits": 42535936.0, - "err": 0.11495883017778397, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313165838068183, - "total_bits": 34964224.0, - "err": 0.13381555676460266, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125266335227273, - "total_bits": 36047872.0, - "err": 0.12370451539754868, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.187766335227273, - "total_bits": 36768768.0, - "err": 0.12150838971138, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500266335227273, - "total_bits": 40373248.0, - "err": 0.10896125435829163, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6876331676136362, - "total_bits": 42534400.0, - "err": 0.10020306706428528, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031316583806818, - "total_bits": 46498560.0, - "err": 0.06812775880098343, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1252663352272725, - "total_bits": 47582208.0, - "err": 0.059095557779073715, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2502663352272725, - "total_bits": 49024000.0, - "err": 0.05709623172879219, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3752663352272725, - "total_bits": 50465792.0, - "err": 0.0566091351211071, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031316583806818, - "total_bits": 58032896.0, - "err": 0.03402990475296974, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2502663352272725, - "total_bits": 60558336.0, - "err": 0.029108021408319473, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3752663352272725, - "total_bits": 62000128.0, - "err": 0.02897460013628006, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5002663352272725, - "total_bits": 63441920.0, - "err": 0.02697221376001835, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.8127663352272725, - "total_bits": 67046400.0, - "err": 0.026030637323856354, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031316583806818, - "total_bits": 69567232.0, - "err": 0.017704930156469345, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1252663352272725, - "total_bits": 70650880.0, - "err": 0.017289498820900917, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.281316583806818, - "total_bits": 72450816.0, - "err": 0.017017962411046028, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125266335227273, - "total_bits": 93719552.0, - "err": 0.010882462374866009, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.6.mlp.down_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1711869673295454, - "total_bits": 25043200.0, - "err": 0.23563355207443237, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 27402496.0, - "err": 0.21104440093040466, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 30286080.0, - "err": 0.20183861255645752, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.716641512784091, - "total_bits": 31334656.0, - "err": 0.1803532987833023, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.216641512784091, - "total_bits": 37101824.0, - "err": 0.10883864760398865, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7393687855113638, - "total_bits": 43131136.0, - "err": 0.09886085242033005, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 34965568.0, - "err": 0.1272946000099182, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 36053248.0, - "err": 0.11667470633983612, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1711869673295454, - "total_bits": 36577536.0, - "err": 0.1119488850235939, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.534823330965909, - "total_bits": 40771840.0, - "err": 0.09354916960000992, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.653775301846591, - "total_bits": 42143872.0, - "err": 0.08932525664567947, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 46499904.0, - "err": 0.06507513672113419, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 47587584.0, - "err": 0.056336164474487305, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.216641512784091, - "total_bits": 48636160.0, - "err": 0.05270175263285637, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.307550603693182, - "total_bits": 49684736.0, - "err": 0.05184097960591316, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 58034240.0, - "err": 0.032826609909534454, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.216641512784091, - "total_bits": 60170496.0, - "err": 0.028096554800868034, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.307550603693182, - "total_bits": 61219072.0, - "err": 0.02778785116970539, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.534823330965909, - "total_bits": 63840512.0, - "err": 0.025166910141706467, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.693914240056818, - "total_bits": 65675520.0, - "err": 0.024811016395688057, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 69568576.0, - "err": 0.017929282039403915, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 70656256.0, - "err": 0.018661024048924446, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.213251287286932, - "total_bits": 71665728.0, - "err": 0.016764376312494278, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 93724928.0, - "err": 0.013731464743614197, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.7.self_attn.q_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.188232421875, - "total_bits": 9178112.0, - "err": 0.09093768149614334, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 9964544.0, - "err": 0.08460533618927002, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 11013120.0, - "err": 0.07999762147665024, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750732421875, - "total_bits": 11537408.0, - "err": 0.07157812267541885, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250732421875, - "total_bits": 13634560.0, - "err": 0.04225018247961998, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.688232421875, - "total_bits": 15469568.0, - "err": 0.03927991911768913, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 12714752.0, - "err": 0.05216361954808235, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 13110272.0, - "err": 0.04809378832578659, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.188232421875, - "total_bits": 13372416.0, - "err": 0.04318834841251373, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500732421875, - "total_bits": 14683136.0, - "err": 0.03827251121401787, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6878662109375, - "total_bits": 15468032.0, - "err": 0.03537605702877045, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 16909056.0, - "err": 0.026376057416200638, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 17304576.0, - "err": 0.022964676842093468, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.250732421875, - "total_bits": 17828864.0, - "err": 0.020402267575263977, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.375732421875, - "total_bits": 18353152.0, - "err": 0.019735870882868767, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 21103360.0, - "err": 0.013284506276249886, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.250732421875, - "total_bits": 22023168.0, - "err": 0.010657178238034248, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375732421875, - "total_bits": 22547456.0, - "err": 0.010349959135055542, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.500732421875, - "total_bits": 23071744.0, - "err": 0.009869992733001709, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.813232421875, - "total_bits": 24382464.0, - "err": 0.009242020547389984, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 25297664.0, - "err": 0.007119559682905674, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 25693184.0, - "err": 0.007031756918877363, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28143310546875, - "total_bits": 26346240.0, - "err": 0.0063150483183562756, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 34081792.0, - "err": 0.0047395131550729275, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.7.self_attn.k_proj", - "numel": 524288, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.193359375, - "total_bits": 1149952.0, - "err": 0.07043804973363876, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.380859375, - "total_bits": 1248256.0, - "err": 0.06517510116100311, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.630859375, - "total_bits": 1379328.0, - "err": 0.0617995522916317, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.755859375, - "total_bits": 1444864.0, - "err": 0.05485263094305992, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.255859375, - "total_bits": 1707008.0, - "err": 0.0324290469288826, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.693359375, - "total_bits": 1936384.0, - "err": 0.030142666772007942, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03271484375, - "total_bits": 1590016.0, - "err": 0.03935466706752777, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.130859375, - "total_bits": 1641472.0, - "err": 0.03642076626420021, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.193359375, - "total_bits": 1674240.0, - "err": 0.033250488340854645, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.505859375, - "total_bits": 1838080.0, - "err": 0.029169293120503426, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6904296875, - "total_bits": 1934848.0, - "err": 0.026867683976888657, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03271484375, - "total_bits": 2114304.0, - "err": 0.01986188068985939, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.130859375, - "total_bits": 2165760.0, - "err": 0.017392821609973907, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.255859375, - "total_bits": 2231296.0, - "err": 0.015565115958452225, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.380859375, - "total_bits": 2296832.0, - "err": 0.015120264142751694, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03271484375, - "total_bits": 2638592.0, - "err": 0.009928585030138493, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.255859375, - "total_bits": 2755584.0, - "err": 0.008021331392228603, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.380859375, - "total_bits": 2821120.0, - "err": 0.007804917637258768, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.505859375, - "total_bits": 2886656.0, - "err": 0.007355823647230864, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.818359375, - "total_bits": 3050496.0, - "err": 0.00688757561147213, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03271484375, - "total_bits": 3162880.0, - "err": 0.005175134167075157, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.130859375, - "total_bits": 3214336.0, - "err": 0.005087332800030708, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28271484375, - "total_bits": 3293952.0, - "err": 0.004616828169673681, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.130859375, - "total_bits": 4262912.0, - "err": 0.0032196366228163242, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.7.self_attn.v_proj", - "numel": 524288, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.193359375, - "total_bits": 1149952.0, - "err": 0.2246702015399933, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.380859375, - "total_bits": 1248256.0, - "err": 0.2086804062128067, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.630859375, - "total_bits": 1379328.0, - "err": 0.20342053472995758, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.755859375, - "total_bits": 1444864.0, - "err": 0.18224236369132996, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.255859375, - "total_bits": 1707008.0, - "err": 0.10433656722307205, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.693359375, - "total_bits": 1936384.0, - "err": 0.09979206323623657, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03271484375, - "total_bits": 1590016.0, - "err": 0.12009227275848389, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.130859375, - "total_bits": 1641472.0, - "err": 0.10960620641708374, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.193359375, - "total_bits": 1674240.0, - "err": 0.10672285407781601, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.505859375, - "total_bits": 1838080.0, - "err": 0.09454943239688873, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6904296875, - "total_bits": 1934848.0, - "err": 0.08721081167459488, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03271484375, - "total_bits": 2114304.0, - "err": 0.06124436855316162, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.130859375, - "total_bits": 2165760.0, - "err": 0.0524815171957016, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.255859375, - "total_bits": 2231296.0, - "err": 0.049996424466371536, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.380859375, - "total_bits": 2296832.0, - "err": 0.049374036490917206, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03271484375, - "total_bits": 2638592.0, - "err": 0.030617736279964447, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.255859375, - "total_bits": 2755584.0, - "err": 0.025229956954717636, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.380859375, - "total_bits": 2821120.0, - "err": 0.025008955970406532, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.505859375, - "total_bits": 2886656.0, - "err": 0.0231743436306715, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.818359375, - "total_bits": 3050496.0, - "err": 0.02216663397848606, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03271484375, - "total_bits": 3162880.0, - "err": 0.01564379408955574, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.130859375, - "total_bits": 3214336.0, - "err": 0.014757663011550903, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28271484375, - "total_bits": 3293952.0, - "err": 0.014512808062136173, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.130859375, - "total_bits": 4262912.0, - "err": 0.00866807159036398, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.7.self_attn.o_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.188232421875, - "total_bits": 9178112.0, - "err": 0.14207984507083893, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 9964544.0, - "err": 0.1261696070432663, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 11013120.0, - "err": 0.12050846219062805, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750732421875, - "total_bits": 11537408.0, - "err": 0.0993603989481926, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250732421875, - "total_bits": 13634560.0, - "err": 0.06489403545856476, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.688232421875, - "total_bits": 15469568.0, - "err": 0.060414329171180725, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 12714752.0, - "err": 0.07759569585323334, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 13110272.0, - "err": 0.0693645104765892, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.188232421875, - "total_bits": 13372416.0, - "err": 0.06711659580469131, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500732421875, - "total_bits": 14683136.0, - "err": 0.05419520288705826, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6878662109375, - "total_bits": 15468032.0, - "err": 0.04698910564184189, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 16909056.0, - "err": 0.03933153674006462, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 17304576.0, - "err": 0.033659085631370544, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.250732421875, - "total_bits": 17828864.0, - "err": 0.031698767095804214, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.375732421875, - "total_bits": 18353152.0, - "err": 0.03121890313923359, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 21103360.0, - "err": 0.019882889464497566, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.250732421875, - "total_bits": 22023168.0, - "err": 0.01716626062989235, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375732421875, - "total_bits": 22547456.0, - "err": 0.01703517511487007, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.500732421875, - "total_bits": 23071744.0, - "err": 0.015164828859269619, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.813232421875, - "total_bits": 24382464.0, - "err": 0.014394626952707767, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 25297664.0, - "err": 0.011149131692945957, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 25693184.0, - "err": 0.011736121028661728, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28143310546875, - "total_bits": 26346240.0, - "err": 0.010507696308195591, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 34081792.0, - "err": 0.009005393832921982, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.7.mlp.gate_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.187766335227273, - "total_bits": 25234432.000000004, - "err": 0.2001146376132965, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375266335227273, - "total_bits": 27397120.000000004, - "err": 0.18797415494918823, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625266335227273, - "total_bits": 30280704.000000004, - "err": 0.18338708579540253, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750266335227273, - "total_bits": 31722496.000000004, - "err": 0.16539016366004944, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250266335227273, - "total_bits": 37489664.0, - "err": 0.09395454078912735, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.687766335227273, - "total_bits": 42535936.0, - "err": 0.09021024405956268, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313165838068183, - "total_bits": 34964224.0, - "err": 0.10667148232460022, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125266335227273, - "total_bits": 36047872.0, - "err": 0.09826867282390594, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.187766335227273, - "total_bits": 36768768.0, - "err": 0.09571286290884018, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500266335227273, - "total_bits": 40373248.0, - "err": 0.0858292430639267, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6876331676136362, - "total_bits": 42534400.0, - "err": 0.07944905757904053, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031316583806818, - "total_bits": 46498560.0, - "err": 0.054999880492687225, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1252663352272725, - "total_bits": 47582208.0, - "err": 0.04759516939520836, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2502663352272725, - "total_bits": 49024000.0, - "err": 0.045466117560863495, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3752663352272725, - "total_bits": 50465792.0, - "err": 0.044954970479011536, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031316583806818, - "total_bits": 58032896.0, - "err": 0.02760130725800991, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2502663352272725, - "total_bits": 60558336.0, - "err": 0.024147238582372665, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3752663352272725, - "total_bits": 62000128.0, - "err": 0.023952851071953773, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5002663352272725, - "total_bits": 63441920.0, - "err": 0.022594420239329338, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.8127663352272725, - "total_bits": 67046400.0, - "err": 0.02180437743663788, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031316583806818, - "total_bits": 69567232.0, - "err": 0.014986704103648663, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1252663352272725, - "total_bits": 70650880.0, - "err": 0.01581738516688347, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.281316583806818, - "total_bits": 72450816.0, - "err": 0.014215544797480106, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125266335227273, - "total_bits": 93719552.0, - "err": 0.011649949476122856, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.7.mlp.up_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.187766335227273, - "total_bits": 25234432.000000004, - "err": 0.2442762553691864, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375266335227273, - "total_bits": 27397120.000000004, - "err": 0.22975720465183258, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625266335227273, - "total_bits": 30280704.000000004, - "err": 0.22428229451179504, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750266335227273, - "total_bits": 31722496.000000004, - "err": 0.20247392356395721, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250266335227273, - "total_bits": 37489664.0, - "err": 0.11441945284605026, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.687766335227273, - "total_bits": 42535936.0, - "err": 0.10990650206804276, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313165838068183, - "total_bits": 34964224.0, - "err": 0.12997373938560486, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125266335227273, - "total_bits": 36047872.0, - "err": 0.11985069513320923, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.187766335227273, - "total_bits": 36768768.0, - "err": 0.11655779927968979, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500266335227273, - "total_bits": 40373248.0, - "err": 0.10460096597671509, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6876331676136362, - "total_bits": 42534400.0, - "err": 0.09665504842996597, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031316583806818, - "total_bits": 46498560.0, - "err": 0.06640122085809708, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1252663352272725, - "total_bits": 47582208.0, - "err": 0.05738067254424095, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2502663352272725, - "total_bits": 49024000.0, - "err": 0.05474979057908058, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3752663352272725, - "total_bits": 50465792.0, - "err": 0.05411969870328903, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031316583806818, - "total_bits": 58032896.0, - "err": 0.033191099762916565, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2502663352272725, - "total_bits": 60558336.0, - "err": 0.027923915535211563, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3752663352272725, - "total_bits": 62000128.0, - "err": 0.027674412354826927, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5002663352272725, - "total_bits": 63441920.0, - "err": 0.025904497131705284, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.8127663352272725, - "total_bits": 67046400.0, - "err": 0.02488003484904766, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031316583806818, - "total_bits": 69567232.0, - "err": 0.017242033034563065, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1252663352272725, - "total_bits": 70650880.0, - "err": 0.01670912094414234, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.281316583806818, - "total_bits": 72450816.0, - "err": 0.016246387735009193, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125266335227273, - "total_bits": 93719552.0, - "err": 0.010412296280264854, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.7.mlp.down_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1711869673295454, - "total_bits": 25043200.0, - "err": 0.08673620969057083, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 27402496.0, - "err": 0.07768762111663818, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 30286080.0, - "err": 0.06532007455825806, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.716641512784091, - "total_bits": 31334656.0, - "err": 0.058745402842760086, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.216641512784091, - "total_bits": 37101824.0, - "err": 0.0397656112909317, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7393687855113638, - "total_bits": 43131136.0, - "err": 0.031778544187545776, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 34965568.0, - "err": 0.06506391614675522, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 36053248.0, - "err": 0.05505342036485672, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1711869673295454, - "total_bits": 36577536.0, - "err": 0.04088500142097473, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.534823330965909, - "total_bits": 40771840.0, - "err": 0.03484419360756874, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.653775301846591, - "total_bits": 42143872.0, - "err": 0.033966485410928726, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 46499904.0, - "err": 0.03012971207499504, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 47587584.0, - "err": 0.02620578184723854, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.216641512784091, - "total_bits": 48636160.0, - "err": 0.02000642567873001, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.307550603693182, - "total_bits": 49684736.0, - "err": 0.018096208572387695, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 58034240.0, - "err": 0.01504978071898222, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.216641512784091, - "total_bits": 60170496.0, - "err": 0.011533516459167004, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.307550603693182, - "total_bits": 61219072.0, - "err": 0.010440537706017494, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.534823330965909, - "total_bits": 63840512.0, - "err": 0.010705665685236454, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.693914240056818, - "total_bits": 65675520.0, - "err": 0.009513922035694122, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 69568576.0, - "err": 0.009103319607675076, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 70656256.0, - "err": 0.009354900568723679, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.213251287286932, - "total_bits": 71665728.0, - "err": 0.007311967201530933, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 93724928.0, - "err": 0.007002376951277256, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.8.self_attn.q_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.188232421875, - "total_bits": 9178112.0, - "err": 0.07402639091014862, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 9964544.0, - "err": 0.06654690951108932, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 11013120.0, - "err": 0.06273672729730606, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750732421875, - "total_bits": 11537408.0, - "err": 0.05531570687890053, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250732421875, - "total_bits": 13634560.0, - "err": 0.033895667642354965, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.688232421875, - "total_bits": 15469568.0, - "err": 0.031178612262010574, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 12714752.0, - "err": 0.042270008474588394, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 13110272.0, - "err": 0.03826798126101494, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.188232421875, - "total_bits": 13372416.0, - "err": 0.03528081998229027, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500732421875, - "total_bits": 14683136.0, - "err": 0.02986474707722664, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6878662109375, - "total_bits": 15468032.0, - "err": 0.027727944776415825, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 16909056.0, - "err": 0.02151327021420002, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 17304576.0, - "err": 0.01830693520605564, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.250732421875, - "total_bits": 17828864.0, - "err": 0.016345493495464325, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.375732421875, - "total_bits": 18353152.0, - "err": 0.015850113704800606, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 21103360.0, - "err": 0.01075607817620039, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.250732421875, - "total_bits": 22023168.0, - "err": 0.008479362353682518, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375732421875, - "total_bits": 22547456.0, - "err": 0.008285499177873135, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.500732421875, - "total_bits": 23071744.0, - "err": 0.007672375068068504, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.813232421875, - "total_bits": 24382464.0, - "err": 0.007160938810557127, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 25297664.0, - "err": 0.005605693440884352, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 25693184.0, - "err": 0.005500145722180605, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28143310546875, - "total_bits": 26346240.0, - "err": 0.004928018897771835, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 34081792.0, - "err": 0.0035985263530164957, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.8.self_attn.k_proj", - "numel": 524288, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.193359375, - "total_bits": 1149952.0, - "err": 0.06992579996585846, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.380859375, - "total_bits": 1248256.0, - "err": 0.061609651893377304, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.630859375, - "total_bits": 1379328.0, - "err": 0.057005539536476135, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.755859375, - "total_bits": 1444864.0, - "err": 0.04964418336749077, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.255859375, - "total_bits": 1707008.0, - "err": 0.031207041814923286, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.693359375, - "total_bits": 1936384.0, - "err": 0.02790248766541481, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03271484375, - "total_bits": 1590016.0, - "err": 0.039722990244627, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.130859375, - "total_bits": 1641472.0, - "err": 0.03675847128033638, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.193359375, - "total_bits": 1674240.0, - "err": 0.032713621854782104, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.505859375, - "total_bits": 1838080.0, - "err": 0.027226734906435013, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6904296875, - "total_bits": 1934848.0, - "err": 0.025078769773244858, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03271484375, - "total_bits": 2114304.0, - "err": 0.02016463875770569, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.130859375, - "total_bits": 2165760.0, - "err": 0.017618969082832336, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.255859375, - "total_bits": 2231296.0, - "err": 0.015040998347103596, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.380859375, - "total_bits": 2296832.0, - "err": 0.014368603006005287, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03271484375, - "total_bits": 2638592.0, - "err": 0.010046676732599735, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.255859375, - "total_bits": 2755584.0, - "err": 0.007747968193143606, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.380859375, - "total_bits": 2821120.0, - "err": 0.007467108778655529, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.505859375, - "total_bits": 2886656.0, - "err": 0.006941621191799641, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.818359375, - "total_bits": 3050496.0, - "err": 0.006311421748250723, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03271484375, - "total_bits": 3162880.0, - "err": 0.005167189985513687, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.130859375, - "total_bits": 3214336.0, - "err": 0.005103099159896374, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28271484375, - "total_bits": 3293952.0, - "err": 0.004332691431045532, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.130859375, - "total_bits": 4262912.0, - "err": 0.0031780421268194914, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.8.self_attn.v_proj", - "numel": 524288, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.193359375, - "total_bits": 1149952.0, - "err": 0.19935667514801025, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.380859375, - "total_bits": 1248256.0, - "err": 0.18357495963573456, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.630859375, - "total_bits": 1379328.0, - "err": 0.17754708230495453, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.755859375, - "total_bits": 1444864.0, - "err": 0.15748314559459686, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.255859375, - "total_bits": 1707008.0, - "err": 0.09232041984796524, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.693359375, - "total_bits": 1936384.0, - "err": 0.08762634545564651, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03271484375, - "total_bits": 1590016.0, - "err": 0.10823192447423935, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.130859375, - "total_bits": 1641472.0, - "err": 0.09824758023023605, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.193359375, - "total_bits": 1674240.0, - "err": 0.0950697660446167, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.505859375, - "total_bits": 1838080.0, - "err": 0.08255156129598618, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6904296875, - "total_bits": 1934848.0, - "err": 0.0755973607301712, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03271484375, - "total_bits": 2114304.0, - "err": 0.05510301515460014, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.130859375, - "total_bits": 2165760.0, - "err": 0.04713885113596916, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.255859375, - "total_bits": 2231296.0, - "err": 0.044299982488155365, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.380859375, - "total_bits": 2296832.0, - "err": 0.04364771395921707, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03271484375, - "total_bits": 2638592.0, - "err": 0.027498889714479446, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.255859375, - "total_bits": 2755584.0, - "err": 0.02241467498242855, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.380859375, - "total_bits": 2821120.0, - "err": 0.022179413586854935, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.505859375, - "total_bits": 2886656.0, - "err": 0.020332664251327515, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.818359375, - "total_bits": 3050496.0, - "err": 0.019325723871588707, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03271484375, - "total_bits": 3162880.0, - "err": 0.013990506529808044, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.130859375, - "total_bits": 3214336.0, - "err": 0.013269852846860886, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28271484375, - "total_bits": 3293952.0, - "err": 0.012848924845457077, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.130859375, - "total_bits": 4262912.0, - "err": 0.00778276938945055, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.8.self_attn.o_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.188232421875, - "total_bits": 9178112.0, - "err": 0.18976803123950958, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 9964544.0, - "err": 0.1618354171514511, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 11013120.0, - "err": 0.1471036672592163, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750732421875, - "total_bits": 11537408.0, - "err": 0.1267784684896469, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250732421875, - "total_bits": 13634560.0, - "err": 0.08611936122179031, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.688232421875, - "total_bits": 15469568.0, - "err": 0.07453924417495728, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 12714752.0, - "err": 0.1115816980600357, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 13110272.0, - "err": 0.10208319127559662, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.188232421875, - "total_bits": 13372416.0, - "err": 0.09093744307756424, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500732421875, - "total_bits": 14683136.0, - "err": 0.07180438935756683, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6878662109375, - "total_bits": 15468032.0, - "err": 0.06550781428813934, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 16909056.0, - "err": 0.05733107030391693, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 17304576.0, - "err": 0.0494609996676445, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.250732421875, - "total_bits": 17828864.0, - "err": 0.04216213524341583, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.375732421875, - "total_bits": 18353152.0, - "err": 0.040311697870492935, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 21103360.0, - "err": 0.028909040614962578, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.250732421875, - "total_bits": 22023168.0, - "err": 0.023030327633023262, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375732421875, - "total_bits": 22547456.0, - "err": 0.02227531559765339, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.500732421875, - "total_bits": 23071744.0, - "err": 0.02036987990140915, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.813232421875, - "total_bits": 24382464.0, - "err": 0.018691450357437134, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 25297664.0, - "err": 0.01602800004184246, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 25693184.0, - "err": 0.016626279801130295, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28143310546875, - "total_bits": 26346240.0, - "err": 0.013799671083688736, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 34081792.0, - "err": 0.012372578494250774, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.8.mlp.gate_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.187766335227273, - "total_bits": 25234432.000000004, - "err": 0.22244597971439362, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375266335227273, - "total_bits": 27397120.000000004, - "err": 0.20913252234458923, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625266335227273, - "total_bits": 30280704.000000004, - "err": 0.2048674374818802, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750266335227273, - "total_bits": 31722496.000000004, - "err": 0.18505197763442993, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250266335227273, - "total_bits": 37489664.0, - "err": 0.1040966808795929, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.687766335227273, - "total_bits": 42535936.0, - "err": 0.10043437778949738, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313165838068183, - "total_bits": 34964224.0, - "err": 0.1164289191365242, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125266335227273, - "total_bits": 36047872.0, - "err": 0.1076064258813858, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.187766335227273, - "total_bits": 36768768.0, - "err": 0.10601144284009933, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500266335227273, - "total_bits": 40373248.0, - "err": 0.09506500512361526, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6876331676136362, - "total_bits": 42534400.0, - "err": 0.08762156963348389, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031316583806818, - "total_bits": 46498560.0, - "err": 0.059316862374544144, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1252663352272725, - "total_bits": 47582208.0, - "err": 0.05147130787372589, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2502663352272725, - "total_bits": 49024000.0, - "err": 0.0498785637319088, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3752663352272725, - "total_bits": 50465792.0, - "err": 0.049497298896312714, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031316583806818, - "total_bits": 58032896.0, - "err": 0.02962653525173664, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2502663352272725, - "total_bits": 60558336.0, - "err": 0.025622185319662094, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3752663352272725, - "total_bits": 62000128.0, - "err": 0.025517653673887253, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5002663352272725, - "total_bits": 63441920.0, - "err": 0.023800773546099663, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.8127663352272725, - "total_bits": 67046400.0, - "err": 0.023028574883937836, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031316583806818, - "total_bits": 69567232.0, - "err": 0.01556491106748581, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1252663352272725, - "total_bits": 70650880.0, - "err": 0.015468420460820198, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.281316583806818, - "total_bits": 72450816.0, - "err": 0.01503404974937439, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125266335227273, - "total_bits": 93719552.0, - "err": 0.01014193519949913, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.8.mlp.up_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.187766335227273, - "total_bits": 25234432.000000004, - "err": 0.2546956241130829, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375266335227273, - "total_bits": 27397120.000000004, - "err": 0.2396080195903778, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625266335227273, - "total_bits": 30280704.000000004, - "err": 0.23487621545791626, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750266335227273, - "total_bits": 31722496.000000004, - "err": 0.2122131735086441, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250266335227273, - "total_bits": 37489664.0, - "err": 0.11912690848112106, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.687766335227273, - "total_bits": 42535936.0, - "err": 0.11506360024213791, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313165838068183, - "total_bits": 34964224.0, - "err": 0.13311851024627686, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125266335227273, - "total_bits": 36047872.0, - "err": 0.12313342839479446, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.187766335227273, - "total_bits": 36768768.0, - "err": 0.12130426615476608, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500266335227273, - "total_bits": 40373248.0, - "err": 0.10895370692014694, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6876331676136362, - "total_bits": 42534400.0, - "err": 0.10034545511007309, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031316583806818, - "total_bits": 46498560.0, - "err": 0.0676470547914505, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1252663352272725, - "total_bits": 47582208.0, - "err": 0.05878672003746033, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2502663352272725, - "total_bits": 49024000.0, - "err": 0.05699581280350685, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3752663352272725, - "total_bits": 50465792.0, - "err": 0.05654554069042206, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031316583806818, - "total_bits": 58032896.0, - "err": 0.033784814178943634, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2502663352272725, - "total_bits": 60558336.0, - "err": 0.028977345675230026, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3752663352272725, - "total_bits": 62000128.0, - "err": 0.02885539084672928, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5002663352272725, - "total_bits": 63441920.0, - "err": 0.026857048273086548, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.8127663352272725, - "total_bits": 67046400.0, - "err": 0.025960544124245644, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031316583806818, - "total_bits": 69567232.0, - "err": 0.01746917888522148, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1252663352272725, - "total_bits": 70650880.0, - "err": 0.017027566209435463, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.281316583806818, - "total_bits": 72450816.0, - "err": 0.016853950917720795, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125266335227273, - "total_bits": 93719552.0, - "err": 0.010541975498199463, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.8.mlp.down_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1711869673295454, - "total_bits": 25043200.0, - "err": 0.24186034500598907, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 27402496.0, - "err": 0.21677301824092865, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 30286080.0, - "err": 0.20598897337913513, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.716641512784091, - "total_bits": 31334656.0, - "err": 0.1845676600933075, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.216641512784091, - "total_bits": 37101824.0, - "err": 0.11205446720123291, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7393687855113638, - "total_bits": 43131136.0, - "err": 0.10105747729539871, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 34965568.0, - "err": 0.13399267196655273, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 36053248.0, - "err": 0.12234041839838028, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1711869673295454, - "total_bits": 36577536.0, - "err": 0.11532453447580338, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.534823330965909, - "total_bits": 40771840.0, - "err": 0.09653956443071365, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.653775301846591, - "total_bits": 42143872.0, - "err": 0.09247534722089767, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 46499904.0, - "err": 0.06859834492206573, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 47587584.0, - "err": 0.05918794125318527, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.216641512784091, - "total_bits": 48636160.0, - "err": 0.054408345371484756, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.307550603693182, - "total_bits": 49684736.0, - "err": 0.053220562636852264, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 58034240.0, - "err": 0.0350397452712059, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.216641512784091, - "total_bits": 60170496.0, - "err": 0.02914203517138958, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.307550603693182, - "total_bits": 61219072.0, - "err": 0.02869100123643875, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.534823330965909, - "total_bits": 63840512.0, - "err": 0.02618696540594101, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.693914240056818, - "total_bits": 65675520.0, - "err": 0.025674104690551758, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 69568576.0, - "err": 0.019700486212968826, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 70656256.0, - "err": 0.01970691978931427, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.213251287286932, - "total_bits": 71665728.0, - "err": 0.018165243789553642, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 93724928.0, - "err": 0.014600437134504318, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.9.self_attn.q_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.188232421875, - "total_bits": 9178112.0, - "err": 0.08213141560554504, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 9964544.0, - "err": 0.07644933462142944, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 11013120.0, - "err": 0.07367347925901413, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750732421875, - "total_bits": 11537408.0, - "err": 0.06609717011451721, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250732421875, - "total_bits": 13634560.0, - "err": 0.03833199292421341, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.688232421875, - "total_bits": 15469568.0, - "err": 0.03630607947707176, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 12714752.0, - "err": 0.04605593904852867, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 13110272.0, - "err": 0.04140085354447365, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.188232421875, - "total_bits": 13372416.0, - "err": 0.039198629558086395, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500732421875, - "total_bits": 14683136.0, - "err": 0.034705642610788345, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6878662109375, - "total_bits": 15468032.0, - "err": 0.03256271034479141, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 16909056.0, - "err": 0.023436609655618668, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 17304576.0, - "err": 0.01979808136820793, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.250732421875, - "total_bits": 17828864.0, - "err": 0.018429305404424667, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.375732421875, - "total_bits": 18353152.0, - "err": 0.018095871433615685, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 21103360.0, - "err": 0.011734637431800365, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.250732421875, - "total_bits": 22023168.0, - "err": 0.009471927769482136, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375732421875, - "total_bits": 22547456.0, - "err": 0.009335544891655445, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.500732421875, - "total_bits": 23071744.0, - "err": 0.008739179000258446, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.813232421875, - "total_bits": 24382464.0, - "err": 0.008320999331772327, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 25297664.0, - "err": 0.006090282928198576, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 25693184.0, - "err": 0.00585215026512742, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28143310546875, - "total_bits": 26346240.0, - "err": 0.005618198774755001, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 34081792.0, - "err": 0.003740665502846241, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.9.self_attn.k_proj", - "numel": 524288, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.193359375, - "total_bits": 1149952.0, - "err": 0.07431629300117493, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.380859375, - "total_bits": 1248256.0, - "err": 0.06863204389810562, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.630859375, - "total_bits": 1379328.0, - "err": 0.06560017168521881, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.755859375, - "total_bits": 1444864.0, - "err": 0.05833366885781288, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.255859375, - "total_bits": 1707008.0, - "err": 0.03404930606484413, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.693359375, - "total_bits": 1936384.0, - "err": 0.03195510059595108, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03271484375, - "total_bits": 1590016.0, - "err": 0.0405883826315403, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.130859375, - "total_bits": 1641472.0, - "err": 0.03760290518403053, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.193359375, - "total_bits": 1674240.0, - "err": 0.034882932901382446, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.505859375, - "total_bits": 1838080.0, - "err": 0.03067922592163086, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6904296875, - "total_bits": 1934848.0, - "err": 0.02817467413842678, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03271484375, - "total_bits": 2114304.0, - "err": 0.020518414676189423, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.130859375, - "total_bits": 2165760.0, - "err": 0.017892250791192055, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.255859375, - "total_bits": 2231296.0, - "err": 0.016328485682606697, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.380859375, - "total_bits": 2296832.0, - "err": 0.015940431505441666, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03271484375, - "total_bits": 2638592.0, - "err": 0.010233065113425255, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.255859375, - "total_bits": 2755584.0, - "err": 0.008318113163113594, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.380859375, - "total_bits": 2821120.0, - "err": 0.008146390318870544, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.505859375, - "total_bits": 2886656.0, - "err": 0.0076088798232376575, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.818359375, - "total_bits": 3050496.0, - "err": 0.007162050344049931, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03271484375, - "total_bits": 3162880.0, - "err": 0.005282239988446236, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.130859375, - "total_bits": 3214336.0, - "err": 0.005135520827025175, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28271484375, - "total_bits": 3293952.0, - "err": 0.004745613317936659, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.130859375, - "total_bits": 4262912.0, - "err": 0.003121953224763274, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.9.self_attn.v_proj", - "numel": 524288, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.193359375, - "total_bits": 1149952.0, - "err": 0.2130395621061325, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.380859375, - "total_bits": 1248256.0, - "err": 0.19839619100093842, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.630859375, - "total_bits": 1379328.0, - "err": 0.19290275871753693, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.755859375, - "total_bits": 1444864.0, - "err": 0.17230261862277985, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.255859375, - "total_bits": 1707008.0, - "err": 0.09865322709083557, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.693359375, - "total_bits": 1936384.0, - "err": 0.09438026696443558, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03271484375, - "total_bits": 1590016.0, - "err": 0.11391303688287735, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.130859375, - "total_bits": 1641472.0, - "err": 0.10377039760351181, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.193359375, - "total_bits": 1674240.0, - "err": 0.10100475698709488, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.505859375, - "total_bits": 1838080.0, - "err": 0.08943596482276917, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6904296875, - "total_bits": 1934848.0, - "err": 0.08212078362703323, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03271484375, - "total_bits": 2114304.0, - "err": 0.05804293230175972, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.130859375, - "total_bits": 2165760.0, - "err": 0.04964454099535942, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.255859375, - "total_bits": 2231296.0, - "err": 0.04724867269396782, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.380859375, - "total_bits": 2296832.0, - "err": 0.04667108505964279, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03271484375, - "total_bits": 2638592.0, - "err": 0.029050419107079506, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.255859375, - "total_bits": 2755584.0, - "err": 0.023813121020793915, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.380859375, - "total_bits": 2821120.0, - "err": 0.02359990030527115, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.505859375, - "total_bits": 2886656.0, - "err": 0.02182227186858654, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.818359375, - "total_bits": 3050496.0, - "err": 0.020905165001749992, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03271484375, - "total_bits": 3162880.0, - "err": 0.014755873940885067, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.130859375, - "total_bits": 3214336.0, - "err": 0.013876236043870449, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28271484375, - "total_bits": 3293952.0, - "err": 0.01366398110985756, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.130859375, - "total_bits": 4262912.0, - "err": 0.007984216324985027, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.9.self_attn.o_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.188232421875, - "total_bits": 9178112.0, - "err": 0.21448098123073578, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 9964544.0, - "err": 0.18923628330230713, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 11013120.0, - "err": 0.1802026480436325, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750732421875, - "total_bits": 11537408.0, - "err": 0.15492567420005798, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250732421875, - "total_bits": 13634560.0, - "err": 0.09777949005365372, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.688232421875, - "total_bits": 15469568.0, - "err": 0.09015995264053345, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 12714752.0, - "err": 0.11471670866012573, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 13110272.0, - "err": 0.10590800642967224, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.188232421875, - "total_bits": 13372416.0, - "err": 0.10144062340259552, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500732421875, - "total_bits": 14683136.0, - "err": 0.08291153609752655, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6878662109375, - "total_bits": 15468032.0, - "err": 0.073975570499897, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 16909056.0, - "err": 0.05855347961187363, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 17304576.0, - "err": 0.050657451152801514, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.250732421875, - "total_bits": 17828864.0, - "err": 0.04697445034980774, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.375732421875, - "total_bits": 18353152.0, - "err": 0.04607399180531502, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 21103360.0, - "err": 0.029212500900030136, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.250732421875, - "total_bits": 22023168.0, - "err": 0.024278849363327026, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375732421875, - "total_bits": 22547456.0, - "err": 0.023979077115654945, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.500732421875, - "total_bits": 23071744.0, - "err": 0.021268034353852272, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.813232421875, - "total_bits": 24382464.0, - "err": 0.0200228039175272, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 25297664.0, - "err": 0.015326878055930138, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 25693184.0, - "err": 0.015245846472680569, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28143310546875, - "total_bits": 26346240.0, - "err": 0.01414245180785656, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 34081792.0, - "err": 0.010020687244832516, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.9.mlp.gate_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.187766335227273, - "total_bits": 25234432.000000004, - "err": 0.21079137921333313, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375266335227273, - "total_bits": 27397120.000000004, - "err": 0.19795635342597961, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625266335227273, - "total_bits": 30280704.000000004, - "err": 0.193790465593338, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750266335227273, - "total_bits": 31722496.000000004, - "err": 0.1748177856206894, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250266335227273, - "total_bits": 37489664.0, - "err": 0.09864384680986404, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.687766335227273, - "total_bits": 42535936.0, - "err": 0.09512583911418915, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313165838068183, - "total_bits": 34964224.0, - "err": 0.11031506210565567, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125266335227273, - "total_bits": 36047872.0, - "err": 0.10220615565776825, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.187766335227273, - "total_bits": 36768768.0, - "err": 0.10049809515476227, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500266335227273, - "total_bits": 40373248.0, - "err": 0.0899977758526802, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6876331676136362, - "total_bits": 42534400.0, - "err": 0.08271970599889755, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031316583806818, - "total_bits": 46498560.0, - "err": 0.056084707379341125, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1252663352272725, - "total_bits": 47582208.0, - "err": 0.04883875697851181, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2502663352272725, - "total_bits": 49024000.0, - "err": 0.047220222651958466, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3752663352272725, - "total_bits": 50465792.0, - "err": 0.04683845862746239, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031316583806818, - "total_bits": 58032896.0, - "err": 0.02802654542028904, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2502663352272725, - "total_bits": 60558336.0, - "err": 0.024205276742577553, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3752663352272725, - "total_bits": 62000128.0, - "err": 0.024099040776491165, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5002663352272725, - "total_bits": 63441920.0, - "err": 0.02244725450873375, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.8127663352272725, - "total_bits": 67046400.0, - "err": 0.02169649861752987, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031316583806818, - "total_bits": 69567232.0, - "err": 0.014605500735342503, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1252663352272725, - "total_bits": 70650880.0, - "err": 0.014538902789354324, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.281316583806818, - "total_bits": 72450816.0, - "err": 0.014059512875974178, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125266335227273, - "total_bits": 93719552.0, - "err": 0.009417328052222729, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.9.mlp.up_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.187766335227273, - "total_bits": 25234432.000000004, - "err": 0.25081151723861694, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375266335227273, - "total_bits": 27397120.000000004, - "err": 0.23572170734405518, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625266335227273, - "total_bits": 30280704.000000004, - "err": 0.2309650331735611, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750266335227273, - "total_bits": 31722496.000000004, - "err": 0.20840901136398315, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250266335227273, - "total_bits": 37489664.0, - "err": 0.11727309972047806, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.687766335227273, - "total_bits": 42535936.0, - "err": 0.11314748972654343, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313165838068183, - "total_bits": 34964224.0, - "err": 0.13105033338069916, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125266335227273, - "total_bits": 36047872.0, - "err": 0.12141722440719604, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.187766335227273, - "total_bits": 36768768.0, - "err": 0.11951436847448349, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500266335227273, - "total_bits": 40373248.0, - "err": 0.10716410726308823, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6876331676136362, - "total_bits": 42534400.0, - "err": 0.09859203547239304, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031316583806818, - "total_bits": 46498560.0, - "err": 0.06648881733417511, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1252663352272725, - "total_bits": 47582208.0, - "err": 0.057921670377254486, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2502663352272725, - "total_bits": 49024000.0, - "err": 0.05606135353446007, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3752663352272725, - "total_bits": 50465792.0, - "err": 0.05561599135398865, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031316583806818, - "total_bits": 58032896.0, - "err": 0.0331888347864151, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2502663352272725, - "total_bits": 60558336.0, - "err": 0.028426550328731537, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3752663352272725, - "total_bits": 62000128.0, - "err": 0.028309907764196396, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5002663352272725, - "total_bits": 63441920.0, - "err": 0.02631569840013981, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.8127663352272725, - "total_bits": 67046400.0, - "err": 0.025406653061509132, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031316583806818, - "total_bits": 69567232.0, - "err": 0.017043601721525192, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1252663352272725, - "total_bits": 70650880.0, - "err": 0.016616621986031532, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.281316583806818, - "total_bits": 72450816.0, - "err": 0.016408640891313553, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125266335227273, - "total_bits": 93719552.0, - "err": 0.010109414346516132, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.9.mlp.down_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1711869673295454, - "total_bits": 25043200.0, - "err": 0.24772757291793823, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 27402496.0, - "err": 0.21936790645122528, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 30286080.0, - "err": 0.2084932029247284, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.716641512784091, - "total_bits": 31334656.0, - "err": 0.18578557670116425, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.216641512784091, - "total_bits": 37101824.0, - "err": 0.11456771194934845, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7393687855113638, - "total_bits": 43131136.0, - "err": 0.10264185070991516, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 34965568.0, - "err": 0.13495633006095886, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 36053248.0, - "err": 0.12387184798717499, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1711869673295454, - "total_bits": 36577536.0, - "err": 0.11808549612760544, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.534823330965909, - "total_bits": 40771840.0, - "err": 0.09691403806209564, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.653775301846591, - "total_bits": 42143872.0, - "err": 0.09251657128334045, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 46499904.0, - "err": 0.06885664165019989, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 47587584.0, - "err": 0.05958966538310051, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.216641512784091, - "total_bits": 48636160.0, - "err": 0.0553467832505703, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.307550603693182, - "total_bits": 49684736.0, - "err": 0.05431434139609337, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 58034240.0, - "err": 0.03453018143773079, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.216641512784091, - "total_bits": 60170496.0, - "err": 0.02924720197916031, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.307550603693182, - "total_bits": 61219072.0, - "err": 0.028892192989587784, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.534823330965909, - "total_bits": 63840512.0, - "err": 0.025828685611486435, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.693914240056818, - "total_bits": 65675520.0, - "err": 0.02540089748799801, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 69568576.0, - "err": 0.018690500408411026, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 70656256.0, - "err": 0.019193727523088455, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.213251287286932, - "total_bits": 71665728.0, - "err": 0.017332561314105988, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 93724928.0, - "err": 0.013733048923313618, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.10.self_attn.q_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.188232421875, - "total_bits": 9178112.0, - "err": 0.07594750821590424, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 9964544.0, - "err": 0.07079090178012848, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 11013120.0, - "err": 0.06831222027540207, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750732421875, - "total_bits": 11537408.0, - "err": 0.06101130694150925, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250732421875, - "total_bits": 13634560.0, - "err": 0.03549918159842491, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.688232421875, - "total_bits": 15469568.0, - "err": 0.033690206706523895, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 12714752.0, - "err": 0.0425216443836689, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 13110272.0, - "err": 0.038178011775016785, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.188232421875, - "total_bits": 13372416.0, - "err": 0.03629160672426224, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500732421875, - "total_bits": 14683136.0, - "err": 0.03221242129802704, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6878662109375, - "total_bits": 15468032.0, - "err": 0.029825162142515182, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 16909056.0, - "err": 0.021697962656617165, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 17304576.0, - "err": 0.01828300766646862, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.250732421875, - "total_bits": 17828864.0, - "err": 0.017076261341571808, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.375732421875, - "total_bits": 18353152.0, - "err": 0.016773344948887825, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 21103360.0, - "err": 0.010852177627384663, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.250732421875, - "total_bits": 22023168.0, - "err": 0.008758218958973885, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375732421875, - "total_bits": 22547456.0, - "err": 0.008641421794891357, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.500732421875, - "total_bits": 23071744.0, - "err": 0.008090244606137276, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.813232421875, - "total_bits": 24382464.0, - "err": 0.007719357497990131, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 25297664.0, - "err": 0.005658513866364956, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 25693184.0, - "err": 0.005379867274314165, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28143310546875, - "total_bits": 26346240.0, - "err": 0.005245950073003769, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 34081792.0, - "err": 0.0034325066953897476, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.10.self_attn.k_proj", - "numel": 524288, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.193359375, - "total_bits": 1149952.0, - "err": 0.07934586703777313, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.380859375, - "total_bits": 1248256.0, - "err": 0.07355834543704987, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.630859375, - "total_bits": 1379328.0, - "err": 0.07045703381299973, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.755859375, - "total_bits": 1444864.0, - "err": 0.06283741444349289, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.255859375, - "total_bits": 1707008.0, - "err": 0.03647448122501373, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.693359375, - "total_bits": 1936384.0, - "err": 0.0343429334461689, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03271484375, - "total_bits": 1590016.0, - "err": 0.04340245947241783, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.130859375, - "total_bits": 1641472.0, - "err": 0.03995329141616821, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.193359375, - "total_bits": 1674240.0, - "err": 0.03740527853369713, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.505859375, - "total_bits": 1838080.0, - "err": 0.03293532133102417, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6904296875, - "total_bits": 1934848.0, - "err": 0.030228042975068092, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03271484375, - "total_bits": 2114304.0, - "err": 0.02192668244242668, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.130859375, - "total_bits": 2165760.0, - "err": 0.01907939650118351, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.255859375, - "total_bits": 2231296.0, - "err": 0.017484748736023903, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.380859375, - "total_bits": 2296832.0, - "err": 0.017111830413341522, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03271484375, - "total_bits": 2638592.0, - "err": 0.01096582505851984, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.255859375, - "total_bits": 2755584.0, - "err": 0.008874434046447277, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.380859375, - "total_bits": 2821120.0, - "err": 0.008697436191141605, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.505859375, - "total_bits": 2886656.0, - "err": 0.00813440140336752, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.818359375, - "total_bits": 3050496.0, - "err": 0.007674640975892544, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03271484375, - "total_bits": 3162880.0, - "err": 0.005647621583193541, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.130859375, - "total_bits": 3214336.0, - "err": 0.005400830414146185, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28271484375, - "total_bits": 3293952.0, - "err": 0.005104376003146172, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.130859375, - "total_bits": 4262912.0, - "err": 0.0032223572488874197, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.10.self_attn.v_proj", - "numel": 524288, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.193359375, - "total_bits": 1149952.0, - "err": 0.2274046540260315, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.380859375, - "total_bits": 1248256.0, - "err": 0.21217206120491028, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.630859375, - "total_bits": 1379328.0, - "err": 0.20619258284568787, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.755859375, - "total_bits": 1444864.0, - "err": 0.18500250577926636, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.255859375, - "total_bits": 1707008.0, - "err": 0.10569222271442413, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.693359375, - "total_bits": 1936384.0, - "err": 0.1010575070977211, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03271484375, - "total_bits": 1590016.0, - "err": 0.12190153449773788, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.130859375, - "total_bits": 1641472.0, - "err": 0.11113478988409042, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.193359375, - "total_bits": 1674240.0, - "err": 0.10810491442680359, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.505859375, - "total_bits": 1838080.0, - "err": 0.09583243727684021, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6904296875, - "total_bits": 1934848.0, - "err": 0.08841303735971451, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03271484375, - "total_bits": 2114304.0, - "err": 0.062069520354270935, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.130859375, - "total_bits": 2165760.0, - "err": 0.05304260551929474, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.255859375, - "total_bits": 2231296.0, - "err": 0.05057856813073158, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.380859375, - "total_bits": 2296832.0, - "err": 0.04992420971393585, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03271484375, - "total_bits": 2638592.0, - "err": 0.03105492703616619, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.255859375, - "total_bits": 2755584.0, - "err": 0.02542913146317005, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.380859375, - "total_bits": 2821120.0, - "err": 0.025193504989147186, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.505859375, - "total_bits": 2886656.0, - "err": 0.02331864833831787, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.818359375, - "total_bits": 3050496.0, - "err": 0.022313710302114487, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03271484375, - "total_bits": 3162880.0, - "err": 0.015778301283717155, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.130859375, - "total_bits": 3214336.0, - "err": 0.014722349122166634, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28271484375, - "total_bits": 3293952.0, - "err": 0.014598961919546127, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.130859375, - "total_bits": 4262912.0, - "err": 0.00838935561478138, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.10.self_attn.o_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.188232421875, - "total_bits": 9178112.0, - "err": 0.22618834674358368, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 9964544.0, - "err": 0.2012525349855423, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 11013120.0, - "err": 0.1926061064004898, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750732421875, - "total_bits": 11537408.0, - "err": 0.16401755809783936, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250732421875, - "total_bits": 13634560.0, - "err": 0.10304200649261475, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.688232421875, - "total_bits": 15469568.0, - "err": 0.09582772850990295, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 12714752.0, - "err": 0.12053526192903519, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 13110272.0, - "err": 0.11093699187040329, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.188232421875, - "total_bits": 13372416.0, - "err": 0.10702833533287048, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500732421875, - "total_bits": 14683136.0, - "err": 0.08816894143819809, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6878662109375, - "total_bits": 15468032.0, - "err": 0.0781964361667633, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 16909056.0, - "err": 0.061786480247974396, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 17304576.0, - "err": 0.05340961366891861, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.250732421875, - "total_bits": 17828864.0, - "err": 0.049823783338069916, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.375732421875, - "total_bits": 18353152.0, - "err": 0.04895716533064842, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 21103360.0, - "err": 0.031016837805509567, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.250732421875, - "total_bits": 22023168.0, - "err": 0.026405232027173042, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375732421875, - "total_bits": 22547456.0, - "err": 0.02616718038916588, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.500732421875, - "total_bits": 23071744.0, - "err": 0.023508526384830475, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.813232421875, - "total_bits": 24382464.0, - "err": 0.022286415100097656, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 25297664.0, - "err": 0.016721146181225777, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 25693184.0, - "err": 0.017382539808750153, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28143310546875, - "total_bits": 26346240.0, - "err": 0.015568200498819351, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 34081792.0, - "err": 0.012632005847990513, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.10.mlp.gate_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.187766335227273, - "total_bits": 25234432.000000004, - "err": 0.21941214799880981, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375266335227273, - "total_bits": 27397120.000000004, - "err": 0.20622876286506653, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625266335227273, - "total_bits": 30280704.000000004, - "err": 0.20183435082435608, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750266335227273, - "total_bits": 31722496.000000004, - "err": 0.18215474486351013, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250266335227273, - "total_bits": 37489664.0, - "err": 0.1027418002486229, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.687766335227273, - "total_bits": 42535936.0, - "err": 0.09903375804424286, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313165838068183, - "total_bits": 34964224.0, - "err": 0.11504077166318893, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125266335227273, - "total_bits": 36047872.0, - "err": 0.10649090260267258, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.187766335227273, - "total_bits": 36768768.0, - "err": 0.1046612486243248, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500266335227273, - "total_bits": 40373248.0, - "err": 0.0938156396150589, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6876331676136362, - "total_bits": 42534400.0, - "err": 0.0862342119216919, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031316583806818, - "total_bits": 46498560.0, - "err": 0.05849700793623924, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1252663352272725, - "total_bits": 47582208.0, - "err": 0.05086471140384674, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2502663352272725, - "total_bits": 49024000.0, - "err": 0.04917651414871216, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3752663352272725, - "total_bits": 50465792.0, - "err": 0.04877287521958351, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031316583806818, - "total_bits": 58032896.0, - "err": 0.02920563891530037, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2502663352272725, - "total_bits": 60558336.0, - "err": 0.02511022984981537, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3752663352272725, - "total_bits": 62000128.0, - "err": 0.02498975396156311, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5002663352272725, - "total_bits": 63441920.0, - "err": 0.023270029574632645, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.8127663352272725, - "total_bits": 67046400.0, - "err": 0.022476868703961372, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031316583806818, - "total_bits": 69567232.0, - "err": 0.015162235125899315, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1252663352272725, - "total_bits": 70650880.0, - "err": 0.01493599358946085, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.281316583806818, - "total_bits": 72450816.0, - "err": 0.014584826305508614, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125266335227273, - "total_bits": 93719552.0, - "err": 0.009473886340856552, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.10.mlp.up_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.187766335227273, - "total_bits": 25234432.000000004, - "err": 0.257891982793808, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375266335227273, - "total_bits": 27397120.000000004, - "err": 0.24231451749801636, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625266335227273, - "total_bits": 30280704.000000004, - "err": 0.2373059093952179, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750266335227273, - "total_bits": 31722496.000000004, - "err": 0.21419109404087067, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250266335227273, - "total_bits": 37489664.0, - "err": 0.12069805711507797, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.687766335227273, - "total_bits": 42535936.0, - "err": 0.11638795584440231, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313165838068183, - "total_bits": 34964224.0, - "err": 0.13501250743865967, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125266335227273, - "total_bits": 36047872.0, - "err": 0.12497380375862122, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.187766335227273, - "total_bits": 36768768.0, - "err": 0.12290629744529724, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500266335227273, - "total_bits": 40373248.0, - "err": 0.11024843156337738, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6876331676136362, - "total_bits": 42534400.0, - "err": 0.10141574591398239, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031316583806818, - "total_bits": 46498560.0, - "err": 0.06860306113958359, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1252663352272725, - "total_bits": 47582208.0, - "err": 0.05962587893009186, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2502663352272725, - "total_bits": 49024000.0, - "err": 0.057685017585754395, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3752663352272725, - "total_bits": 50465792.0, - "err": 0.05722804367542267, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031316583806818, - "total_bits": 58032896.0, - "err": 0.034213434904813766, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2502663352272725, - "total_bits": 60558336.0, - "err": 0.029270052909851074, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3752663352272725, - "total_bits": 62000128.0, - "err": 0.029146414250135422, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5002663352272725, - "total_bits": 63441920.0, - "err": 0.027094049379229546, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.8127663352272725, - "total_bits": 67046400.0, - "err": 0.0261527132242918, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031316583806818, - "total_bits": 69567232.0, - "err": 0.017627447843551636, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1252663352272725, - "total_bits": 70650880.0, - "err": 0.01712585799396038, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.281316583806818, - "total_bits": 72450816.0, - "err": 0.016970006749033928, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125266335227273, - "total_bits": 93719552.0, - "err": 0.010437116958200932, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.10.mlp.down_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1711869673295454, - "total_bits": 25043200.0, - "err": 0.2601604759693146, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 27402496.0, - "err": 0.2319779247045517, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 30286080.0, - "err": 0.22099162638187408, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.716641512784091, - "total_bits": 31334656.0, - "err": 0.1975526660680771, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.216641512784091, - "total_bits": 37101824.0, - "err": 0.1206764355301857, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7393687855113638, - "total_bits": 43131136.0, - "err": 0.10879054665565491, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 34965568.0, - "err": 0.14203087985515594, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 36053248.0, - "err": 0.13077712059020996, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1711869673295454, - "total_bits": 36577536.0, - "err": 0.1245458796620369, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.534823330965909, - "total_bits": 40771840.0, - "err": 0.10321487486362457, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.653775301846591, - "total_bits": 42143872.0, - "err": 0.09840177744626999, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 46499904.0, - "err": 0.07254181802272797, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 47587584.0, - "err": 0.0630333423614502, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.216641512784091, - "total_bits": 48636160.0, - "err": 0.058419715613126755, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.307550603693182, - "total_bits": 49684736.0, - "err": 0.057300206273794174, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 58034240.0, - "err": 0.0365307554602623, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.216641512784091, - "total_bits": 60170496.0, - "err": 0.030891790986061096, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.307550603693182, - "total_bits": 61219072.0, - "err": 0.03048243559896946, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.534823330965909, - "total_bits": 63840512.0, - "err": 0.027477901428937912, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.693914240056818, - "total_bits": 65675520.0, - "err": 0.026976002380251884, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 69568576.0, - "err": 0.019740484654903412, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 70656256.0, - "err": 0.020304765552282333, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.213251287286932, - "total_bits": 71665728.0, - "err": 0.01827375218272209, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 93724928.0, - "err": 0.014507748186588287, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.11.self_attn.q_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.188232421875, - "total_bits": 9178112.0, - "err": 0.09714207053184509, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 9964544.0, - "err": 0.0899362713098526, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 11013120.0, - "err": 0.08659684658050537, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750732421875, - "total_bits": 11537408.0, - "err": 0.07736173272132874, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250732421875, - "total_bits": 13634560.0, - "err": 0.04505741223692894, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.688232421875, - "total_bits": 15469568.0, - "err": 0.0424557663500309, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 12714752.0, - "err": 0.05334886908531189, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 13110272.0, - "err": 0.04852158576250076, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.188232421875, - "total_bits": 13372416.0, - "err": 0.04612945392727852, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500732421875, - "total_bits": 14683136.0, - "err": 0.04051626846194267, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6878662109375, - "total_bits": 15468032.0, - "err": 0.03740822523832321, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 16909056.0, - "err": 0.027078572660684586, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 17304576.0, - "err": 0.02322407066822052, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.250732421875, - "total_bits": 17828864.0, - "err": 0.021662916988134384, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.375732421875, - "total_bits": 18353152.0, - "err": 0.021283350884914398, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 21103360.0, - "err": 0.013541040942072868, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.250732421875, - "total_bits": 22023168.0, - "err": 0.011204845272004604, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375732421875, - "total_bits": 22547456.0, - "err": 0.011050800792872906, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.500732421875, - "total_bits": 23071744.0, - "err": 0.010277978144586086, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.813232421875, - "total_bits": 24382464.0, - "err": 0.009794855490326881, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 25297664.0, - "err": 0.0070716687478125095, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 25693184.0, - "err": 0.007026610430330038, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28143310546875, - "total_bits": 26346240.0, - "err": 0.00650674756616354, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 34081792.0, - "err": 0.0046541946940124035, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.11.self_attn.k_proj", - "numel": 524288, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.193359375, - "total_bits": 1149952.0, - "err": 0.08752786368131638, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.380859375, - "total_bits": 1248256.0, - "err": 0.08047271519899368, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.630859375, - "total_bits": 1379328.0, - "err": 0.07712038606405258, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.755859375, - "total_bits": 1444864.0, - "err": 0.0684434026479721, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.255859375, - "total_bits": 1707008.0, - "err": 0.039956752210855484, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.693359375, - "total_bits": 1936384.0, - "err": 0.03744705021381378, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03271484375, - "total_bits": 1590016.0, - "err": 0.04736324027180672, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.130859375, - "total_bits": 1641472.0, - "err": 0.043695997446775436, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.193359375, - "total_bits": 1674240.0, - "err": 0.04101996123790741, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.505859375, - "total_bits": 1838080.0, - "err": 0.035815250128507614, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6904296875, - "total_bits": 1934848.0, - "err": 0.03276648744940758, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03271484375, - "total_bits": 2114304.0, - "err": 0.023898782208561897, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.130859375, - "total_bits": 2165760.0, - "err": 0.020929589867591858, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.255859375, - "total_bits": 2231296.0, - "err": 0.019173260778188705, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.380859375, - "total_bits": 2296832.0, - "err": 0.018716702237725258, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03271484375, - "total_bits": 2638592.0, - "err": 0.011940499767661095, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.255859375, - "total_bits": 2755584.0, - "err": 0.009750649333000183, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.380859375, - "total_bits": 2821120.0, - "err": 0.00956773292273283, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.505859375, - "total_bits": 2886656.0, - "err": 0.008895198814570904, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.818359375, - "total_bits": 3050496.0, - "err": 0.00840324629098177, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03271484375, - "total_bits": 3162880.0, - "err": 0.006138388067483902, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.130859375, - "total_bits": 3214336.0, - "err": 0.005963829346001148, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28271484375, - "total_bits": 3293952.0, - "err": 0.0055466461926698685, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.130859375, - "total_bits": 4262912.0, - "err": 0.0036466927267611027, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.11.self_attn.v_proj", - "numel": 524288, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.193359375, - "total_bits": 1149952.0, - "err": 0.21400341391563416, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.380859375, - "total_bits": 1248256.0, - "err": 0.19951733946800232, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.630859375, - "total_bits": 1379328.0, - "err": 0.19381539523601532, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.755859375, - "total_bits": 1444864.0, - "err": 0.17376361787319183, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.255859375, - "total_bits": 1707008.0, - "err": 0.09958993643522263, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.693359375, - "total_bits": 1936384.0, - "err": 0.09529878199100494, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03271484375, - "total_bits": 1590016.0, - "err": 0.11482331156730652, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.130859375, - "total_bits": 1641472.0, - "err": 0.10469532012939453, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.193359375, - "total_bits": 1674240.0, - "err": 0.10175499320030212, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.505859375, - "total_bits": 1838080.0, - "err": 0.0902307853102684, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6904296875, - "total_bits": 1934848.0, - "err": 0.08332077413797379, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03271484375, - "total_bits": 2114304.0, - "err": 0.05864850431680679, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.130859375, - "total_bits": 2165760.0, - "err": 0.050214797258377075, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.255859375, - "total_bits": 2231296.0, - "err": 0.04768958315253258, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.380859375, - "total_bits": 2296832.0, - "err": 0.047028400003910065, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03271484375, - "total_bits": 2638592.0, - "err": 0.029242970049381256, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.255859375, - "total_bits": 2755584.0, - "err": 0.024113371968269348, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.380859375, - "total_bits": 2821120.0, - "err": 0.023879479616880417, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.505859375, - "total_bits": 2886656.0, - "err": 0.022116657346487045, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.818359375, - "total_bits": 3050496.0, - "err": 0.021162915974855423, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03271484375, - "total_bits": 3162880.0, - "err": 0.014903416857123375, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.130859375, - "total_bits": 3214336.0, - "err": 0.014095749706029892, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28271484375, - "total_bits": 3293952.0, - "err": 0.013848217204213142, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.130859375, - "total_bits": 4262912.0, - "err": 0.008243519812822342, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.11.self_attn.o_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.188232421875, - "total_bits": 9178112.0, - "err": 0.21030932664871216, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 9964544.0, - "err": 0.1827549934387207, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 11013120.0, - "err": 0.17246121168136597, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750732421875, - "total_bits": 11537408.0, - "err": 0.14905479550361633, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250732421875, - "total_bits": 13634560.0, - "err": 0.09516244381666183, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.688232421875, - "total_bits": 15469568.0, - "err": 0.08640088886022568, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 12714752.0, - "err": 0.11477135121822357, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 13110272.0, - "err": 0.10526827722787857, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.188232421875, - "total_bits": 13372416.0, - "err": 0.09986723214387894, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500732421875, - "total_bits": 14683136.0, - "err": 0.08049213141202927, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6878662109375, - "total_bits": 15468032.0, - "err": 0.07312754541635513, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 16909056.0, - "err": 0.058687642216682434, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 17304576.0, - "err": 0.0504675917327404, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.250732421875, - "total_bits": 17828864.0, - "err": 0.04588222876191139, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.375732421875, - "total_bits": 18353152.0, - "err": 0.04474565014243126, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 21103360.0, - "err": 0.02930009551346302, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.250732421875, - "total_bits": 22023168.0, - "err": 0.023878835141658783, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375732421875, - "total_bits": 22547456.0, - "err": 0.023518890142440796, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.500732421875, - "total_bits": 23071744.0, - "err": 0.020914023742079735, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.813232421875, - "total_bits": 24382464.0, - "err": 0.019546937197446823, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 25297664.0, - "err": 0.015383636578917503, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 25693184.0, - "err": 0.015375970862805843, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28143310546875, - "total_bits": 26346240.0, - "err": 0.013882763683795929, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 34081792.0, - "err": 0.010308804921805859, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.11.mlp.gate_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.187766335227273, - "total_bits": 25234432.000000004, - "err": 0.20618146657943726, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375266335227273, - "total_bits": 27397120.000000004, - "err": 0.1935914158821106, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625266335227273, - "total_bits": 30280704.000000004, - "err": 0.189411461353302, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750266335227273, - "total_bits": 31722496.000000004, - "err": 0.1709030717611313, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250266335227273, - "total_bits": 37489664.0, - "err": 0.09664662182331085, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.687766335227273, - "total_bits": 42535936.0, - "err": 0.09314233809709549, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313165838068183, - "total_bits": 34964224.0, - "err": 0.1083831712603569, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125266335227273, - "total_bits": 36047872.0, - "err": 0.1003170907497406, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.187766335227273, - "total_bits": 36768768.0, - "err": 0.09850180894136429, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500266335227273, - "total_bits": 40373248.0, - "err": 0.08812033385038376, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6876331676136362, - "total_bits": 42534400.0, - "err": 0.08111467957496643, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031316583806818, - "total_bits": 46498560.0, - "err": 0.05513283610343933, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1252663352272725, - "total_bits": 47582208.0, - "err": 0.047971755266189575, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2502663352272725, - "total_bits": 49024000.0, - "err": 0.04630187526345253, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3752663352272725, - "total_bits": 50465792.0, - "err": 0.04589870944619179, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031316583806818, - "total_bits": 58032896.0, - "err": 0.0275428406894207, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2502663352272725, - "total_bits": 60558336.0, - "err": 0.023725062608718872, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3752663352272725, - "total_bits": 62000128.0, - "err": 0.023608149960637093, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5002663352272725, - "total_bits": 63441920.0, - "err": 0.021980181336402893, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.8127663352272725, - "total_bits": 67046400.0, - "err": 0.021228540688753128, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031316583806818, - "total_bits": 69567232.0, - "err": 0.014352587051689625, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1252663352272725, - "total_bits": 70650880.0, - "err": 0.01426320243626833, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.281316583806818, - "total_bits": 72450816.0, - "err": 0.013789443299174309, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125266335227273, - "total_bits": 93719552.0, - "err": 0.009219221770763397, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.11.mlp.up_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.187766335227273, - "total_bits": 25234432.000000004, - "err": 0.2543342113494873, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375266335227273, - "total_bits": 27397120.000000004, - "err": 0.23896972835063934, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625266335227273, - "total_bits": 30280704.000000004, - "err": 0.23397314548492432, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750266335227273, - "total_bits": 31722496.000000004, - "err": 0.2109319120645523, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250266335227273, - "total_bits": 37489664.0, - "err": 0.11918216198682785, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.687766335227273, - "total_bits": 42535936.0, - "err": 0.1149633452296257, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313165838068183, - "total_bits": 34964224.0, - "err": 0.133319690823555, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125266335227273, - "total_bits": 36047872.0, - "err": 0.12354380637407303, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.187766335227273, - "total_bits": 36768768.0, - "err": 0.12146876752376556, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500266335227273, - "total_bits": 40373248.0, - "err": 0.10878945142030716, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6876331676136362, - "total_bits": 42534400.0, - "err": 0.09997983276844025, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031316583806818, - "total_bits": 46498560.0, - "err": 0.06772828102111816, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1252663352272725, - "total_bits": 47582208.0, - "err": 0.05896453186869621, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2502663352272725, - "total_bits": 49024000.0, - "err": 0.05700744315981865, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3752663352272725, - "total_bits": 50465792.0, - "err": 0.05653801187872887, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031316583806818, - "total_bits": 58032896.0, - "err": 0.03382183238863945, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2502663352272725, - "total_bits": 60558336.0, - "err": 0.028945371508598328, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3752663352272725, - "total_bits": 62000128.0, - "err": 0.02881276048719883, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5002663352272725, - "total_bits": 63441920.0, - "err": 0.026771651580929756, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.8127663352272725, - "total_bits": 67046400.0, - "err": 0.025819433853030205, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031316583806818, - "total_bits": 69567232.0, - "err": 0.017391718924045563, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1252663352272725, - "total_bits": 70650880.0, - "err": 0.016949595883488655, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.281316583806818, - "total_bits": 72450816.0, - "err": 0.016723040491342545, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125266335227273, - "total_bits": 93719552.0, - "err": 0.01036082673817873, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.11.mlp.down_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1711869673295454, - "total_bits": 25043200.0, - "err": 0.25729426741600037, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 27402496.0, - "err": 0.22722722589969635, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 30286080.0, - "err": 0.21530091762542725, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.716641512784091, - "total_bits": 31334656.0, - "err": 0.1916355937719345, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.216641512784091, - "total_bits": 37101824.0, - "err": 0.11949222534894943, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7393687855113638, - "total_bits": 43131136.0, - "err": 0.1062382385134697, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 34965568.0, - "err": 0.14126166701316833, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 36053248.0, - "err": 0.129800483584404, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1711869673295454, - "total_bits": 36577536.0, - "err": 0.12327711284160614, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.534823330965909, - "total_bits": 40771840.0, - "err": 0.1004781574010849, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.653775301846591, - "total_bits": 42143872.0, - "err": 0.09563954174518585, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 46499904.0, - "err": 0.07206901907920837, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 47587584.0, - "err": 0.06249844282865524, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.216641512784091, - "total_bits": 48636160.0, - "err": 0.057747732847929, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.307550603693182, - "total_bits": 49684736.0, - "err": 0.05660540610551834, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 58034240.0, - "err": 0.03623570129275322, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.216641512784091, - "total_bits": 60170496.0, - "err": 0.0304891187697649, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.307550603693182, - "total_bits": 61219072.0, - "err": 0.030067885294556618, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.534823330965909, - "total_bits": 63840512.0, - "err": 0.026782898232340813, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.693914240056818, - "total_bits": 65675520.0, - "err": 0.026281166821718216, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 69568576.0, - "err": 0.019617607817053795, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 70656256.0, - "err": 0.01999521814286709, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.213251287286932, - "total_bits": 71665728.0, - "err": 0.01810152269899845, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 93724928.0, - "err": 0.014200492762029171, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.12.self_attn.q_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.188232421875, - "total_bits": 9178112.0, - "err": 0.09797467291355133, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 9964544.0, - "err": 0.09111861884593964, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 11013120.0, - "err": 0.0878826305270195, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750732421875, - "total_bits": 11537408.0, - "err": 0.07870609313249588, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250732421875, - "total_bits": 13634560.0, - "err": 0.045612942427396774, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.688232421875, - "total_bits": 15469568.0, - "err": 0.04317353293299675, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 12714752.0, - "err": 0.054569125175476074, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 13110272.0, - "err": 0.049227192997932434, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.188232421875, - "total_bits": 13372416.0, - "err": 0.04665123298764229, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500732421875, - "total_bits": 14683136.0, - "err": 0.041299425065517426, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6878662109375, - "total_bits": 15468032.0, - "err": 0.03855947032570839, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 16909056.0, - "err": 0.02783195674419403, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 17304576.0, - "err": 0.02356458455324173, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.250732421875, - "total_bits": 17828864.0, - "err": 0.021872907876968384, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.375732421875, - "total_bits": 18353152.0, - "err": 0.02146531641483307, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 21103360.0, - "err": 0.013890640810132027, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.250732421875, - "total_bits": 22023168.0, - "err": 0.011234845034778118, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375732421875, - "total_bits": 22547456.0, - "err": 0.011061426252126694, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.500732421875, - "total_bits": 23071744.0, - "err": 0.010354631580412388, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.813232421875, - "total_bits": 24382464.0, - "err": 0.009842301718890667, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 25297664.0, - "err": 0.007195387035608292, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 25693184.0, - "err": 0.0069519695825874805, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28143310546875, - "total_bits": 26346240.0, - "err": 0.0064962804317474365, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 34081792.0, - "err": 0.00444492744281888, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.12.self_attn.k_proj", - "numel": 524288, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.193359375, - "total_bits": 1149952.0, - "err": 0.07981718331575394, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.380859375, - "total_bits": 1248256.0, - "err": 0.07412711530923843, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.630859375, - "total_bits": 1379328.0, - "err": 0.07073196023702621, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.755859375, - "total_bits": 1444864.0, - "err": 0.06301175057888031, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.255859375, - "total_bits": 1707008.0, - "err": 0.036753326654434204, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.693359375, - "total_bits": 1936384.0, - "err": 0.03450518846511841, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03271484375, - "total_bits": 1590016.0, - "err": 0.043864086270332336, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.130859375, - "total_bits": 1641472.0, - "err": 0.040734097361564636, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.193359375, - "total_bits": 1674240.0, - "err": 0.03764430060982704, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.505859375, - "total_bits": 1838080.0, - "err": 0.0332254022359848, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6904296875, - "total_bits": 1934848.0, - "err": 0.0304951760917902, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03271484375, - "total_bits": 2114304.0, - "err": 0.02219996228814125, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.130859375, - "total_bits": 2165760.0, - "err": 0.01945500634610653, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.255859375, - "total_bits": 2231296.0, - "err": 0.017645183950662613, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.380859375, - "total_bits": 2296832.0, - "err": 0.01719522662460804, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03271484375, - "total_bits": 2638592.0, - "err": 0.011076844297349453, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.255859375, - "total_bits": 2755584.0, - "err": 0.009009655565023422, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.380859375, - "total_bits": 2821120.0, - "err": 0.008808910846710205, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.505859375, - "total_bits": 2886656.0, - "err": 0.00828572828322649, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.818359375, - "total_bits": 3050496.0, - "err": 0.007792672608047724, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03271484375, - "total_bits": 3162880.0, - "err": 0.005738171748816967, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.130859375, - "total_bits": 3214336.0, - "err": 0.0056123933754861355, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28271484375, - "total_bits": 3293952.0, - "err": 0.005173675250262022, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.130859375, - "total_bits": 4262912.0, - "err": 0.003455260070040822, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.12.self_attn.v_proj", - "numel": 524288, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.193359375, - "total_bits": 1149952.0, - "err": 0.24354565143585205, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.380859375, - "total_bits": 1248256.0, - "err": 0.22726136445999146, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.630859375, - "total_bits": 1379328.0, - "err": 0.22192247211933136, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.755859375, - "total_bits": 1444864.0, - "err": 0.199044868350029, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.255859375, - "total_bits": 1707008.0, - "err": 0.11358587443828583, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.693359375, - "total_bits": 1936384.0, - "err": 0.10872144252061844, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03271484375, - "total_bits": 1590016.0, - "err": 0.13013772666454315, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.130859375, - "total_bits": 1641472.0, - "err": 0.11890818178653717, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.193359375, - "total_bits": 1674240.0, - "err": 0.11595804244279861, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.505859375, - "total_bits": 1838080.0, - "err": 0.10304397344589233, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6904296875, - "total_bits": 1934848.0, - "err": 0.09514026343822479, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03271484375, - "total_bits": 2114304.0, - "err": 0.06657490879297256, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.130859375, - "total_bits": 2165760.0, - "err": 0.05681614577770233, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.255859375, - "total_bits": 2231296.0, - "err": 0.05427674204111099, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.380859375, - "total_bits": 2296832.0, - "err": 0.05367117375135422, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03271484375, - "total_bits": 2638592.0, - "err": 0.03329882398247719, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.255859375, - "total_bits": 2755584.0, - "err": 0.027352139353752136, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.380859375, - "total_bits": 2821120.0, - "err": 0.027110986411571503, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.505859375, - "total_bits": 2886656.0, - "err": 0.02512155845761299, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.818359375, - "total_bits": 3050496.0, - "err": 0.02409498021006584, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03271484375, - "total_bits": 3162880.0, - "err": 0.016915511339902878, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.130859375, - "total_bits": 3214336.0, - "err": 0.01587795466184616, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28271484375, - "total_bits": 3293952.0, - "err": 0.01573038101196289, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.130859375, - "total_bits": 4262912.0, - "err": 0.009157134220004082, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.12.self_attn.o_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.188232421875, - "total_bits": 9178112.0, - "err": 0.22693103551864624, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 9964544.0, - "err": 0.2050100713968277, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 11013120.0, - "err": 0.19726374745368958, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750732421875, - "total_bits": 11537408.0, - "err": 0.16832002997398376, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250732421875, - "total_bits": 13634560.0, - "err": 0.10424910485744476, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.688232421875, - "total_bits": 15469568.0, - "err": 0.0978110283613205, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 12714752.0, - "err": 0.12121983617544174, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 13110272.0, - "err": 0.11115601658821106, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.188232421875, - "total_bits": 13372416.0, - "err": 0.10758372396230698, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500732421875, - "total_bits": 14683136.0, - "err": 0.08984420448541641, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6878662109375, - "total_bits": 15468032.0, - "err": 0.07960092276334763, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 16909056.0, - "err": 0.06208178400993347, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 17304576.0, - "err": 0.053403012454509735, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.250732421875, - "total_bits": 17828864.0, - "err": 0.05021945387125015, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.375732421875, - "total_bits": 18353152.0, - "err": 0.04946131631731987, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 21103360.0, - "err": 0.031052997335791588, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.250732421875, - "total_bits": 22023168.0, - "err": 0.026251815259456635, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375732421875, - "total_bits": 22547456.0, - "err": 0.02602122165262699, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.500732421875, - "total_bits": 23071744.0, - "err": 0.023384153842926025, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.813232421875, - "total_bits": 24382464.0, - "err": 0.022095447406172752, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 25297664.0, - "err": 0.016570210456848145, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 25693184.0, - "err": 0.016798019409179688, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28143310546875, - "total_bits": 26346240.0, - "err": 0.01552888285368681, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 34081792.0, - "err": 0.011677644215524197, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.12.mlp.gate_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.187766335227273, - "total_bits": 25234432.000000004, - "err": 0.1919708251953125, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375266335227273, - "total_bits": 27397120.000000004, - "err": 0.18016110360622406, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625266335227273, - "total_bits": 30280704.000000004, - "err": 0.17610131204128265, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750266335227273, - "total_bits": 31722496.000000004, - "err": 0.15859323740005493, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250266335227273, - "total_bits": 37489664.0, - "err": 0.09003307670354843, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.687766335227273, - "total_bits": 42535936.0, - "err": 0.08668747544288635, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313165838068183, - "total_bits": 34964224.0, - "err": 0.10110872983932495, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125266335227273, - "total_bits": 36047872.0, - "err": 0.09365487098693848, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.187766335227273, - "total_bits": 36768768.0, - "err": 0.09179911017417908, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500266335227273, - "total_bits": 40373248.0, - "err": 0.08205221593379974, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6876331676136362, - "total_bits": 42534400.0, - "err": 0.07541421800851822, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031316583806818, - "total_bits": 46498560.0, - "err": 0.05156079679727554, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1252663352272725, - "total_bits": 47582208.0, - "err": 0.044902343302965164, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2502663352272725, - "total_bits": 49024000.0, - "err": 0.043268486857414246, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3752663352272725, - "total_bits": 50465792.0, - "err": 0.04288138076663017, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031316583806818, - "total_bits": 58032896.0, - "err": 0.025787353515625, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2502663352272725, - "total_bits": 60558336.0, - "err": 0.022462770342826843, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3752663352272725, - "total_bits": 62000128.0, - "err": 0.02235574647784233, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5002663352272725, - "total_bits": 63441920.0, - "err": 0.02085094526410103, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.8127663352272725, - "total_bits": 67046400.0, - "err": 0.020151326432824135, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031316583806818, - "total_bits": 69567232.0, - "err": 0.01360893901437521, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1252663352272725, - "total_bits": 70650880.0, - "err": 0.013950030319392681, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.281316583806818, - "total_bits": 72450816.0, - "err": 0.013064551167190075, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125266335227273, - "total_bits": 93719552.0, - "err": 0.009563306346535683, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.12.mlp.up_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.187766335227273, - "total_bits": 25234432.000000004, - "err": 0.24526174366474152, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375266335227273, - "total_bits": 27397120.000000004, - "err": 0.23018048703670502, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625266335227273, - "total_bits": 30280704.000000004, - "err": 0.22536422312259674, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750266335227273, - "total_bits": 31722496.000000004, - "err": 0.2031143307685852, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250266335227273, - "total_bits": 37489664.0, - "err": 0.11494418978691101, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.687766335227273, - "total_bits": 42535936.0, - "err": 0.11072824895381927, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313165838068183, - "total_bits": 34964224.0, - "err": 0.12895724177360535, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125266335227273, - "total_bits": 36047872.0, - "err": 0.11924965679645538, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.187766335227273, - "total_bits": 36768768.0, - "err": 0.11710578948259354, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500266335227273, - "total_bits": 40373248.0, - "err": 0.1047697514295578, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6876331676136362, - "total_bits": 42534400.0, - "err": 0.09629731625318527, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031316583806818, - "total_bits": 46498560.0, - "err": 0.06551118195056915, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1252663352272725, - "total_bits": 47582208.0, - "err": 0.05693749338388443, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2502663352272725, - "total_bits": 49024000.0, - "err": 0.054973382502794266, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3752663352272725, - "total_bits": 50465792.0, - "err": 0.054488617926836014, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031316583806818, - "total_bits": 58032896.0, - "err": 0.03269215300679207, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2502663352272725, - "total_bits": 60558336.0, - "err": 0.027931027114391327, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3752663352272725, - "total_bits": 62000128.0, - "err": 0.027802210301160812, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5002663352272725, - "total_bits": 63441920.0, - "err": 0.025821903720498085, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.8127663352272725, - "total_bits": 67046400.0, - "err": 0.024897582828998566, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031316583806818, - "total_bits": 69567232.0, - "err": 0.016825931146740913, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1252663352272725, - "total_bits": 70650880.0, - "err": 0.016428379341959953, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.281316583806818, - "total_bits": 72450816.0, - "err": 0.01614464446902275, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125266335227273, - "total_bits": 93719552.0, - "err": 0.010100945830345154, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.12.mlp.down_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1711869673295454, - "total_bits": 25043200.0, - "err": 0.250014990568161, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 27402496.0, - "err": 0.21854102611541748, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 30286080.0, - "err": 0.20583899319171906, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.716641512784091, - "total_bits": 31334656.0, - "err": 0.18203425407409668, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.216641512784091, - "total_bits": 37101824.0, - "err": 0.11568833887577057, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7393687855113638, - "total_bits": 43131136.0, - "err": 0.10188372433185577, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 34965568.0, - "err": 0.1382426768541336, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 36053248.0, - "err": 0.12663236260414124, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1711869673295454, - "total_bits": 36577536.0, - "err": 0.11958430707454681, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.534823330965909, - "total_bits": 40771840.0, - "err": 0.09604883193969727, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.653775301846591, - "total_bits": 42143872.0, - "err": 0.09134243428707123, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 46499904.0, - "err": 0.0704866349697113, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 47587584.0, - "err": 0.061010655015707016, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.216641512784091, - "total_bits": 48636160.0, - "err": 0.05600736662745476, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.307550603693182, - "total_bits": 49684736.0, - "err": 0.054769232869148254, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 58034240.0, - "err": 0.035622842609882355, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.216641512784091, - "total_bits": 60170496.0, - "err": 0.0296586025506258, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.307550603693182, - "total_bits": 61219072.0, - "err": 0.029210463166236877, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.534823330965909, - "total_bits": 63840512.0, - "err": 0.02587655372917652, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.693914240056818, - "total_bits": 65675520.0, - "err": 0.025309359654784203, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 69568576.0, - "err": 0.01952735334634781, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 70656256.0, - "err": 0.019686322659254074, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.213251287286932, - "total_bits": 71665728.0, - "err": 0.01797489821910858, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 93724928.0, - "err": 0.014119517989456654, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.13.self_attn.q_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.188232421875, - "total_bits": 9178112.0, - "err": 0.09896036982536316, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 9964544.0, - "err": 0.09228245913982391, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 11013120.0, - "err": 0.08889539539813995, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750732421875, - "total_bits": 11537408.0, - "err": 0.07969950884580612, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250732421875, - "total_bits": 13634560.0, - "err": 0.046072814613580704, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.688232421875, - "total_bits": 15469568.0, - "err": 0.04359075427055359, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 12714752.0, - "err": 0.05452567711472511, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 13110272.0, - "err": 0.05002618953585625, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.188232421875, - "total_bits": 13372416.0, - "err": 0.04707023873925209, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500732421875, - "total_bits": 14683136.0, - "err": 0.04182465374469757, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6878662109375, - "total_bits": 15468032.0, - "err": 0.03875000774860382, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 16909056.0, - "err": 0.027737032622098923, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 17304576.0, - "err": 0.023967040702700615, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.250732421875, - "total_bits": 17828864.0, - "err": 0.022112468257546425, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.375732421875, - "total_bits": 18353152.0, - "err": 0.0216507725417614, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 21103360.0, - "err": 0.013870110735297203, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.250732421875, - "total_bits": 22023168.0, - "err": 0.011345821432769299, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375732421875, - "total_bits": 22547456.0, - "err": 0.011149629019200802, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.500732421875, - "total_bits": 23071744.0, - "err": 0.01047654077410698, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.813232421875, - "total_bits": 24382464.0, - "err": 0.009938710369169712, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 25297664.0, - "err": 0.007188837509602308, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 25693184.0, - "err": 0.007046305574476719, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28143310546875, - "total_bits": 26346240.0, - "err": 0.006545519921928644, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 34081792.0, - "err": 0.004478106740862131, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.13.self_attn.k_proj", - "numel": 524288, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.193359375, - "total_bits": 1149952.0, - "err": 0.0877041220664978, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.380859375, - "total_bits": 1248256.0, - "err": 0.08150696009397507, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.630859375, - "total_bits": 1379328.0, - "err": 0.07835094630718231, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.755859375, - "total_bits": 1444864.0, - "err": 0.06966657191514969, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.255859375, - "total_bits": 1707008.0, - "err": 0.04044589400291443, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.693359375, - "total_bits": 1936384.0, - "err": 0.038092270493507385, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03271484375, - "total_bits": 1590016.0, - "err": 0.04766617342829704, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.130859375, - "total_bits": 1641472.0, - "err": 0.04410338029265404, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.193359375, - "total_bits": 1674240.0, - "err": 0.041277073323726654, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.505859375, - "total_bits": 1838080.0, - "err": 0.03644696623086929, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6904296875, - "total_bits": 1934848.0, - "err": 0.03349275514483452, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03271484375, - "total_bits": 2114304.0, - "err": 0.024169163778424263, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.130859375, - "total_bits": 2165760.0, - "err": 0.021088114008307457, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.255859375, - "total_bits": 2231296.0, - "err": 0.019426755607128143, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.380859375, - "total_bits": 2296832.0, - "err": 0.01901041530072689, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03271484375, - "total_bits": 2638592.0, - "err": 0.012035535648465157, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.255859375, - "total_bits": 2755584.0, - "err": 0.009910850785672665, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.380859375, - "total_bits": 2821120.0, - "err": 0.009728443808853626, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.505859375, - "total_bits": 2886656.0, - "err": 0.009107168763875961, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.818359375, - "total_bits": 3050496.0, - "err": 0.008618921041488647, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03271484375, - "total_bits": 3162880.0, - "err": 0.006204306613653898, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.130859375, - "total_bits": 3214336.0, - "err": 0.006084084045141935, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28271484375, - "total_bits": 3293952.0, - "err": 0.005655952263623476, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.130859375, - "total_bits": 4262912.0, - "err": 0.0037744748406112194, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.13.self_attn.v_proj", - "numel": 524288, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.193359375, - "total_bits": 1149952.0, - "err": 0.22813302278518677, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.380859375, - "total_bits": 1248256.0, - "err": 0.2134549915790558, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.630859375, - "total_bits": 1379328.0, - "err": 0.20751529932022095, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.755859375, - "total_bits": 1444864.0, - "err": 0.1871664971113205, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.255859375, - "total_bits": 1707008.0, - "err": 0.1062895879149437, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.693359375, - "total_bits": 1936384.0, - "err": 0.10181116312742233, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03271484375, - "total_bits": 1590016.0, - "err": 0.12216665595769882, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.130859375, - "total_bits": 1641472.0, - "err": 0.11151334643363953, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.193359375, - "total_bits": 1674240.0, - "err": 0.10842063277959824, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.505859375, - "total_bits": 1838080.0, - "err": 0.09656743705272675, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6904296875, - "total_bits": 1934848.0, - "err": 0.08964311331510544, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03271484375, - "total_bits": 2114304.0, - "err": 0.062412600964307785, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.130859375, - "total_bits": 2165760.0, - "err": 0.05335996672511101, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.255859375, - "total_bits": 2231296.0, - "err": 0.05087297409772873, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.380859375, - "total_bits": 2296832.0, - "err": 0.05020616576075554, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03271484375, - "total_bits": 2638592.0, - "err": 0.031175583600997925, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.255859375, - "total_bits": 2755584.0, - "err": 0.025656910613179207, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.380859375, - "total_bits": 2821120.0, - "err": 0.02544322982430458, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.505859375, - "total_bits": 2886656.0, - "err": 0.02363339625298977, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.818359375, - "total_bits": 3050496.0, - "err": 0.022656772285699844, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03271484375, - "total_bits": 3162880.0, - "err": 0.015907147899270058, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.130859375, - "total_bits": 3214336.0, - "err": 0.014874571934342384, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28271484375, - "total_bits": 3293952.0, - "err": 0.014772551134228706, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.130859375, - "total_bits": 4262912.0, - "err": 0.008518919348716736, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.13.self_attn.o_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.188232421875, - "total_bits": 9178112.0, - "err": 0.2054424285888672, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 9964544.0, - "err": 0.18286508321762085, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 11013120.0, - "err": 0.17481720447540283, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750732421875, - "total_bits": 11537408.0, - "err": 0.14972873032093048, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250732421875, - "total_bits": 13634560.0, - "err": 0.09349493682384491, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.688232421875, - "total_bits": 15469568.0, - "err": 0.08698336035013199, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 12714752.0, - "err": 0.10966400802135468, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 13110272.0, - "err": 0.10056787729263306, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.188232421875, - "total_bits": 13372416.0, - "err": 0.09688445925712585, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500732421875, - "total_bits": 14683136.0, - "err": 0.07986794412136078, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6878662109375, - "total_bits": 15468032.0, - "err": 0.07173328101634979, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 16909056.0, - "err": 0.0561131052672863, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 17304576.0, - "err": 0.04826951026916504, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.250732421875, - "total_bits": 17828864.0, - "err": 0.04507371783256531, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.375732421875, - "total_bits": 18353152.0, - "err": 0.04428614303469658, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 21103360.0, - "err": 0.028155025094747543, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.250732421875, - "total_bits": 22023168.0, - "err": 0.023640375584363937, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375732421875, - "total_bits": 22547456.0, - "err": 0.023415829986333847, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.500732421875, - "total_bits": 23071744.0, - "err": 0.020957810804247856, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.813232421875, - "total_bits": 24382464.0, - "err": 0.01981949992477894, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 25297664.0, - "err": 0.015155700966715813, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 25693184.0, - "err": 0.015273969620466232, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28143310546875, - "total_bits": 26346240.0, - "err": 0.014131462201476097, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 34081792.0, - "err": 0.010708830319344997, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.13.mlp.gate_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.187766335227273, - "total_bits": 25234432.000000004, - "err": 0.194964200258255, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375266335227273, - "total_bits": 27397120.000000004, - "err": 0.18305686116218567, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625266335227273, - "total_bits": 30280704.000000004, - "err": 0.17905637621879578, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750266335227273, - "total_bits": 31722496.000000004, - "err": 0.16153770685195923, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250266335227273, - "total_bits": 37489664.0, - "err": 0.09131431579589844, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.687766335227273, - "total_bits": 42535936.0, - "err": 0.08798374235630035, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313165838068183, - "total_bits": 34964224.0, - "err": 0.10251462459564209, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125266335227273, - "total_bits": 36047872.0, - "err": 0.09494469314813614, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.187766335227273, - "total_bits": 36768768.0, - "err": 0.09307209402322769, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500266335227273, - "total_bits": 40373248.0, - "err": 0.0833725854754448, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6876331676136362, - "total_bits": 42534400.0, - "err": 0.07683976739645004, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031316583806818, - "total_bits": 46498560.0, - "err": 0.05226485803723335, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1252663352272725, - "total_bits": 47582208.0, - "err": 0.04547903314232826, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2502663352272725, - "total_bits": 49024000.0, - "err": 0.04384982958436012, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3752663352272725, - "total_bits": 50465792.0, - "err": 0.043468963354825974, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031316583806818, - "total_bits": 58032896.0, - "err": 0.02612273022532463, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2502663352272725, - "total_bits": 60558336.0, - "err": 0.022700270637869835, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3752663352272725, - "total_bits": 62000128.0, - "err": 0.02258816547691822, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5002663352272725, - "total_bits": 63441920.0, - "err": 0.02108735777437687, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.8127663352272725, - "total_bits": 67046400.0, - "err": 0.02037467248737812, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031316583806818, - "total_bits": 69567232.0, - "err": 0.013776724226772785, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1252663352272725, - "total_bits": 70650880.0, - "err": 0.01398763433098793, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.281316583806818, - "total_bits": 72450816.0, - "err": 0.013241808861494064, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125266335227273, - "total_bits": 93719552.0, - "err": 0.009473263286054134, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.13.mlp.up_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.187766335227273, - "total_bits": 25234432.000000004, - "err": 0.2400498390197754, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375266335227273, - "total_bits": 27397120.000000004, - "err": 0.2255382537841797, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625266335227273, - "total_bits": 30280704.000000004, - "err": 0.22069504857063293, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750266335227273, - "total_bits": 31722496.000000004, - "err": 0.19920654594898224, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250266335227273, - "total_bits": 37489664.0, - "err": 0.11243222653865814, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.687766335227273, - "total_bits": 42535936.0, - "err": 0.10834838449954987, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313165838068183, - "total_bits": 34964224.0, - "err": 0.1260533183813095, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125266335227273, - "total_bits": 36047872.0, - "err": 0.11664131283760071, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.187766335227273, - "total_bits": 36768768.0, - "err": 0.11454637348651886, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500266335227273, - "total_bits": 40373248.0, - "err": 0.10254278779029846, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6876331676136362, - "total_bits": 42534400.0, - "err": 0.09444690495729446, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031316583806818, - "total_bits": 46498560.0, - "err": 0.06406521052122116, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1252663352272725, - "total_bits": 47582208.0, - "err": 0.0556936115026474, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2502663352272725, - "total_bits": 49024000.0, - "err": 0.05379250645637512, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3752663352272725, - "total_bits": 50465792.0, - "err": 0.05333224684000015, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031316583806818, - "total_bits": 58032896.0, - "err": 0.031975433230400085, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2502663352272725, - "total_bits": 60558336.0, - "err": 0.027337683364748955, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3752663352272725, - "total_bits": 62000128.0, - "err": 0.027200322598218918, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5002663352272725, - "total_bits": 63441920.0, - "err": 0.025295978412032127, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.8127663352272725, - "total_bits": 67046400.0, - "err": 0.024412915110588074, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031316583806818, - "total_bits": 69567232.0, - "err": 0.016507426276803017, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1252663352272725, - "total_bits": 70650880.0, - "err": 0.01606873795390129, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.281316583806818, - "total_bits": 72450816.0, - "err": 0.015840942040085793, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125266335227273, - "total_bits": 93719552.0, - "err": 0.009889495559036732, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.13.mlp.down_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1711869673295454, - "total_bits": 25043200.0, - "err": 0.2510981261730194, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 27402496.0, - "err": 0.21960118412971497, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 30286080.0, - "err": 0.2072419375181198, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.716641512784091, - "total_bits": 31334656.0, - "err": 0.18302373588085175, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.216641512784091, - "total_bits": 37101824.0, - "err": 0.11610579490661621, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7393687855113638, - "total_bits": 43131136.0, - "err": 0.10255209356546402, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 34965568.0, - "err": 0.1378859579563141, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 36053248.0, - "err": 0.12663304805755615, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1711869673295454, - "total_bits": 36577536.0, - "err": 0.12017938494682312, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.534823330965909, - "total_bits": 40771840.0, - "err": 0.09642770886421204, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.653775301846591, - "total_bits": 42143872.0, - "err": 0.09169315546751022, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 46499904.0, - "err": 0.07070264965295792, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 47587584.0, - "err": 0.06093153357505798, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.216641512784091, - "total_bits": 48636160.0, - "err": 0.0560554601252079, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.307550603693182, - "total_bits": 49684736.0, - "err": 0.05486096814274788, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 58034240.0, - "err": 0.03566639870405197, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.216641512784091, - "total_bits": 60170496.0, - "err": 0.02944657951593399, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.307550603693182, - "total_bits": 61219072.0, - "err": 0.029014604166150093, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.534823330965909, - "total_bits": 63840512.0, - "err": 0.0255776084959507, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.693914240056818, - "total_bits": 65675520.0, - "err": 0.025041040033102036, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 69568576.0, - "err": 0.019441023468971252, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 70656256.0, - "err": 0.019180314615368843, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.213251287286932, - "total_bits": 71665728.0, - "err": 0.01788947917521, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 93724928.0, - "err": 0.013371146284043789, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.14.self_attn.q_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.188232421875, - "total_bits": 9178112.0, - "err": 0.08955416828393936, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 9964544.0, - "err": 0.08347910642623901, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 11013120.0, - "err": 0.0803782269358635, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750732421875, - "total_bits": 11537408.0, - "err": 0.07204863429069519, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250732421875, - "total_bits": 13634560.0, - "err": 0.041696056723594666, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.688232421875, - "total_bits": 15469568.0, - "err": 0.039408646523952484, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 12714752.0, - "err": 0.04918178170919418, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 13110272.0, - "err": 0.04512324556708336, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.188232421875, - "total_bits": 13372416.0, - "err": 0.042610615491867065, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500732421875, - "total_bits": 14683136.0, - "err": 0.03774920850992203, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6878662109375, - "total_bits": 15468032.0, - "err": 0.03498433902859688, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 16909056.0, - "err": 0.02506794035434723, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 17304576.0, - "err": 0.021629683673381805, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.250732421875, - "total_bits": 17828864.0, - "err": 0.02001556009054184, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.375732421875, - "total_bits": 18353152.0, - "err": 0.019618764519691467, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 21103360.0, - "err": 0.012554454617202282, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.250732421875, - "total_bits": 22023168.0, - "err": 0.01036660186946392, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375732421875, - "total_bits": 22547456.0, - "err": 0.010199508629739285, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.500732421875, - "total_bits": 23071744.0, - "err": 0.009586219675838947, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.813232421875, - "total_bits": 24382464.0, - "err": 0.009123324416577816, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 25297664.0, - "err": 0.006578981876373291, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 25693184.0, - "err": 0.006550611928105354, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28143310546875, - "total_bits": 26346240.0, - "err": 0.0060212076641619205, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 34081792.0, - "err": 0.004341311287134886, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.14.self_attn.k_proj", - "numel": 524288, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.193359375, - "total_bits": 1149952.0, - "err": 0.08644422143697739, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.380859375, - "total_bits": 1248256.0, - "err": 0.08022746443748474, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.630859375, - "total_bits": 1379328.0, - "err": 0.07723872363567352, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.755859375, - "total_bits": 1444864.0, - "err": 0.06886225193738937, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.255859375, - "total_bits": 1707008.0, - "err": 0.03967365249991417, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.693359375, - "total_bits": 1936384.0, - "err": 0.03761039301753044, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03271484375, - "total_bits": 1590016.0, - "err": 0.04675190523266792, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.130859375, - "total_bits": 1641472.0, - "err": 0.04314543306827545, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.193359375, - "total_bits": 1674240.0, - "err": 0.04067697748541832, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.505859375, - "total_bits": 1838080.0, - "err": 0.035928674042224884, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6904296875, - "total_bits": 1934848.0, - "err": 0.03310186043381691, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03271484375, - "total_bits": 2114304.0, - "err": 0.0235616322606802, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.130859375, - "total_bits": 2165760.0, - "err": 0.020547619089484215, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.255859375, - "total_bits": 2231296.0, - "err": 0.019022732973098755, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.380859375, - "total_bits": 2296832.0, - "err": 0.018698181957006454, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03271484375, - "total_bits": 2638592.0, - "err": 0.011765038594603539, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.255859375, - "total_bits": 2755584.0, - "err": 0.009731938131153584, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.380859375, - "total_bits": 2821120.0, - "err": 0.009572956711053848, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.505859375, - "total_bits": 2886656.0, - "err": 0.008947480469942093, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.818359375, - "total_bits": 3050496.0, - "err": 0.008496095426380634, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03271484375, - "total_bits": 3162880.0, - "err": 0.006091277115046978, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.130859375, - "total_bits": 3214336.0, - "err": 0.00595190841704607, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28271484375, - "total_bits": 3293952.0, - "err": 0.0055850837379693985, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.130859375, - "total_bits": 4262912.0, - "err": 0.003680282738059759, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.14.self_attn.v_proj", - "numel": 524288, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.193359375, - "total_bits": 1149952.0, - "err": 0.21902519464492798, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.380859375, - "total_bits": 1248256.0, - "err": 0.20422372221946716, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.630859375, - "total_bits": 1379328.0, - "err": 0.19864927232265472, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.755859375, - "total_bits": 1444864.0, - "err": 0.17816729843616486, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.255859375, - "total_bits": 1707008.0, - "err": 0.1014641597867012, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.693359375, - "total_bits": 1936384.0, - "err": 0.09718497097492218, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03271484375, - "total_bits": 1590016.0, - "err": 0.11669900268316269, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.130859375, - "total_bits": 1641472.0, - "err": 0.10670887678861618, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.193359375, - "total_bits": 1674240.0, - "err": 0.10363519936800003, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.505859375, - "total_bits": 1838080.0, - "err": 0.0919388011097908, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6904296875, - "total_bits": 1934848.0, - "err": 0.08483298867940903, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03271484375, - "total_bits": 2114304.0, - "err": 0.05951112508773804, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.130859375, - "total_bits": 2165760.0, - "err": 0.05094170570373535, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.255859375, - "total_bits": 2231296.0, - "err": 0.04856168106198311, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.380859375, - "total_bits": 2296832.0, - "err": 0.04798141121864319, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03271484375, - "total_bits": 2638592.0, - "err": 0.02970290184020996, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.255859375, - "total_bits": 2755584.0, - "err": 0.02452707476913929, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.380859375, - "total_bits": 2821120.0, - "err": 0.024323448538780212, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.505859375, - "total_bits": 2886656.0, - "err": 0.022561050951480865, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.818359375, - "total_bits": 3050496.0, - "err": 0.0216051172465086, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03271484375, - "total_bits": 3162880.0, - "err": 0.015186537057161331, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.130859375, - "total_bits": 3214336.0, - "err": 0.014393015764653683, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28271484375, - "total_bits": 3293952.0, - "err": 0.014112764969468117, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.130859375, - "total_bits": 4262912.0, - "err": 0.008542793802917004, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.14.self_attn.o_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.188232421875, - "total_bits": 9178112.0, - "err": 0.19817882776260376, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 9964544.0, - "err": 0.17866508662700653, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 11013120.0, - "err": 0.16731929779052734, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750732421875, - "total_bits": 11537408.0, - "err": 0.1476152539253235, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250732421875, - "total_bits": 13634560.0, - "err": 0.08960877358913422, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.688232421875, - "total_bits": 15469568.0, - "err": 0.08191654086112976, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 12714752.0, - "err": 0.11405983567237854, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 13110272.0, - "err": 0.10386326909065247, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.188232421875, - "total_bits": 13372416.0, - "err": 0.09382537007331848, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500732421875, - "total_bits": 14683136.0, - "err": 0.07973616570234299, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6878662109375, - "total_bits": 15468032.0, - "err": 0.0736694484949112, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 16909056.0, - "err": 0.05835766717791557, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 17304576.0, - "err": 0.04983160272240639, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.250732421875, - "total_bits": 17828864.0, - "err": 0.04348629713058472, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.375732421875, - "total_bits": 18353152.0, - "err": 0.041854128241539, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 21103360.0, - "err": 0.029261959716677666, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.250732421875, - "total_bits": 22023168.0, - "err": 0.02304312400519848, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375732421875, - "total_bits": 22547456.0, - "err": 0.02239382453262806, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.500732421875, - "total_bits": 23071744.0, - "err": 0.02111539989709854, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.813232421875, - "total_bits": 24382464.0, - "err": 0.019601553678512573, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 25297664.0, - "err": 0.015612425282597542, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 25693184.0, - "err": 0.015735507011413574, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28143310546875, - "total_bits": 26346240.0, - "err": 0.013513064943253994, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 34081792.0, - "err": 0.011027012020349503, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.14.mlp.gate_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.187766335227273, - "total_bits": 25234432.000000004, - "err": 0.1983337700366974, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375266335227273, - "total_bits": 27397120.000000004, - "err": 0.18641701340675354, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625266335227273, - "total_bits": 30280704.000000004, - "err": 0.18238049745559692, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750266335227273, - "total_bits": 31722496.000000004, - "err": 0.16474059224128723, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250266335227273, - "total_bits": 37489664.0, - "err": 0.09297453612089157, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.687766335227273, - "total_bits": 42535936.0, - "err": 0.08964568376541138, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313165838068183, - "total_bits": 34964224.0, - "err": 0.10419955104589462, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125266335227273, - "total_bits": 36047872.0, - "err": 0.0964326336979866, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.187766335227273, - "total_bits": 36768768.0, - "err": 0.09468036144971848, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500266335227273, - "total_bits": 40373248.0, - "err": 0.0849289670586586, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6876331676136362, - "total_bits": 42534400.0, - "err": 0.07835593819618225, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031316583806818, - "total_bits": 46498560.0, - "err": 0.053065985441207886, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1252663352272725, - "total_bits": 47582208.0, - "err": 0.046207934617996216, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2502663352272725, - "total_bits": 49024000.0, - "err": 0.044612329453229904, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3752663352272725, - "total_bits": 50465792.0, - "err": 0.04424638673663139, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031316583806818, - "total_bits": 58032896.0, - "err": 0.026520265266299248, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2502663352272725, - "total_bits": 60558336.0, - "err": 0.023078760132193565, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3752663352272725, - "total_bits": 62000128.0, - "err": 0.022968053817749023, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5002663352272725, - "total_bits": 63441920.0, - "err": 0.021458376199007034, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.8127663352272725, - "total_bits": 67046400.0, - "err": 0.02074364200234413, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031316583806818, - "total_bits": 69567232.0, - "err": 0.013931801542639732, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1252663352272725, - "total_bits": 70650880.0, - "err": 0.014181737788021564, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.281316583806818, - "total_bits": 72450816.0, - "err": 0.013399630784988403, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125266335227273, - "total_bits": 93719552.0, - "err": 0.00959556084126234, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.14.mlp.up_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.187766335227273, - "total_bits": 25234432.000000004, - "err": 0.23764261603355408, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375266335227273, - "total_bits": 27397120.000000004, - "err": 0.22360211610794067, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625266335227273, - "total_bits": 30280704.000000004, - "err": 0.2188420593738556, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750266335227273, - "total_bits": 31722496.000000004, - "err": 0.197833850979805, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250266335227273, - "total_bits": 37489664.0, - "err": 0.11129233241081238, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.687766335227273, - "total_bits": 42535936.0, - "err": 0.10738717764616013, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313165838068183, - "total_bits": 34964224.0, - "err": 0.12473432719707489, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125266335227273, - "total_bits": 36047872.0, - "err": 0.11529413610696793, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.187766335227273, - "total_bits": 36768768.0, - "err": 0.11332383751869202, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500266335227273, - "total_bits": 40373248.0, - "err": 0.10177157074213028, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6876331676136362, - "total_bits": 42534400.0, - "err": 0.09380785375833511, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031316583806818, - "total_bits": 46498560.0, - "err": 0.06331337988376617, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1252663352272725, - "total_bits": 47582208.0, - "err": 0.05505291372537613, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2502663352272725, - "total_bits": 49024000.0, - "err": 0.05321114882826805, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3752663352272725, - "total_bits": 50465792.0, - "err": 0.052772026509046555, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031316583806818, - "total_bits": 58032896.0, - "err": 0.031609825789928436, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2502663352272725, - "total_bits": 60558336.0, - "err": 0.027022097259759903, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3752663352272725, - "total_bits": 62000128.0, - "err": 0.026896724477410316, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5002663352272725, - "total_bits": 63441920.0, - "err": 0.02503853105008602, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.8127663352272725, - "total_bits": 67046400.0, - "err": 0.024177653715014458, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031316583806818, - "total_bits": 69567232.0, - "err": 0.01627163216471672, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1252663352272725, - "total_bits": 70650880.0, - "err": 0.015853149816393852, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.281316583806818, - "total_bits": 72450816.0, - "err": 0.015630682930350304, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125266335227273, - "total_bits": 93719552.0, - "err": 0.009719274006783962, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.14.mlp.down_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1711869673295454, - "total_bits": 25043200.0, - "err": 0.2615850865840912, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 27402496.0, - "err": 0.2282927930355072, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 30286080.0, - "err": 0.2150156944990158, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.716641512784091, - "total_bits": 31334656.0, - "err": 0.18965262174606323, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.216641512784091, - "total_bits": 37101824.0, - "err": 0.12099000811576843, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7393687855113638, - "total_bits": 43131136.0, - "err": 0.10653962939977646, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 34965568.0, - "err": 0.14424937963485718, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 36053248.0, - "err": 0.13265100121498108, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1711869673295454, - "total_bits": 36577536.0, - "err": 0.1253167688846588, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.534823330965909, - "total_bits": 40771840.0, - "err": 0.10041091591119766, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.653775301846591, - "total_bits": 42143872.0, - "err": 0.09547929465770721, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 46499904.0, - "err": 0.07418540120124817, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 47587584.0, - "err": 0.06399443000555038, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.216641512784091, - "total_bits": 48636160.0, - "err": 0.05868294835090637, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.307550603693182, - "total_bits": 49684736.0, - "err": 0.05737739056348801, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 58034240.0, - "err": 0.03743370994925499, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.216641512784091, - "total_bits": 60170496.0, - "err": 0.03115057945251465, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.307550603693182, - "total_bits": 61219072.0, - "err": 0.030676184222102165, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.534823330965909, - "total_bits": 63840512.0, - "err": 0.027150079607963562, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.693914240056818, - "total_bits": 65675520.0, - "err": 0.026553062722086906, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 69568576.0, - "err": 0.020459765568375587, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 70656256.0, - "err": 0.02073693834245205, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.213251287286932, - "total_bits": 71665728.0, - "err": 0.018758848309516907, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 93724928.0, - "err": 0.014923209324479103, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.15.self_attn.q_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.188232421875, - "total_bits": 9178112.0, - "err": 0.10890667885541916, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 9964544.0, - "err": 0.10149475932121277, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 11013120.0, - "err": 0.09828741103410721, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750732421875, - "total_bits": 11537408.0, - "err": 0.0879938080906868, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250732421875, - "total_bits": 13634560.0, - "err": 0.0505809411406517, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.688232421875, - "total_bits": 15469568.0, - "err": 0.048196833580732346, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 12714752.0, - "err": 0.059251315891742706, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 13110272.0, - "err": 0.053808677941560745, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.188232421875, - "total_bits": 13372416.0, - "err": 0.05171475186944008, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500732421875, - "total_bits": 14683136.0, - "err": 0.04585029184818268, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6878662109375, - "total_bits": 15468032.0, - "err": 0.042726486921310425, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 16909056.0, - "err": 0.030201822519302368, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 17304576.0, - "err": 0.025756217539310455, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.250732421875, - "total_bits": 17828864.0, - "err": 0.024276090785861015, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.375732421875, - "total_bits": 18353152.0, - "err": 0.02392166666686535, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 21103360.0, - "err": 0.015101616270840168, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.250732421875, - "total_bits": 22023168.0, - "err": 0.012512614019215107, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375732421875, - "total_bits": 22547456.0, - "err": 0.01238981168717146, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.500732421875, - "total_bits": 23071744.0, - "err": 0.011554134078323841, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.813232421875, - "total_bits": 24382464.0, - "err": 0.011064636521041393, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 25297664.0, - "err": 0.007889076136052608, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 25693184.0, - "err": 0.007734909188002348, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28143310546875, - "total_bits": 26346240.0, - "err": 0.007394350133836269, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 34081792.0, - "err": 0.005085740238428116, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.15.self_attn.k_proj", - "numel": 524288, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.193359375, - "total_bits": 1149952.0, - "err": 0.08760703355073929, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.380859375, - "total_bits": 1248256.0, - "err": 0.08135426044464111, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.630859375, - "total_bits": 1379328.0, - "err": 0.0782073512673378, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.755859375, - "total_bits": 1444864.0, - "err": 0.06956257671117783, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.255859375, - "total_bits": 1707008.0, - "err": 0.04014372080564499, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.693359375, - "total_bits": 1936384.0, - "err": 0.03792721405625343, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03271484375, - "total_bits": 1590016.0, - "err": 0.04723396524786949, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.130859375, - "total_bits": 1641472.0, - "err": 0.04372909292578697, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.193359375, - "total_bits": 1674240.0, - "err": 0.04107658192515373, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.505859375, - "total_bits": 1838080.0, - "err": 0.03634271025657654, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6904296875, - "total_bits": 1934848.0, - "err": 0.033270999789237976, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03271484375, - "total_bits": 2114304.0, - "err": 0.023840686306357384, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.130859375, - "total_bits": 2165760.0, - "err": 0.0208161398768425, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.255859375, - "total_bits": 2231296.0, - "err": 0.01921229250729084, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.380859375, - "total_bits": 2296832.0, - "err": 0.0188460536301136, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03271484375, - "total_bits": 2638592.0, - "err": 0.011903220787644386, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.255859375, - "total_bits": 2755584.0, - "err": 0.009857792407274246, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.380859375, - "total_bits": 2821120.0, - "err": 0.00967993400990963, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.505859375, - "total_bits": 2886656.0, - "err": 0.009070364758372307, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.818359375, - "total_bits": 3050496.0, - "err": 0.008623995818197727, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03271484375, - "total_bits": 3162880.0, - "err": 0.0061819893307983875, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.130859375, - "total_bits": 3214336.0, - "err": 0.006085104774683714, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28271484375, - "total_bits": 3293952.0, - "err": 0.0056828525848686695, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.130859375, - "total_bits": 4262912.0, - "err": 0.003844962455332279, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.15.self_attn.v_proj", - "numel": 524288, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.193359375, - "total_bits": 1149952.0, - "err": 0.22320501506328583, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.380859375, - "total_bits": 1248256.0, - "err": 0.2077065259218216, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.630859375, - "total_bits": 1379328.0, - "err": 0.2023901343345642, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.755859375, - "total_bits": 1444864.0, - "err": 0.18089589476585388, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.255859375, - "total_bits": 1707008.0, - "err": 0.10339720547199249, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.693359375, - "total_bits": 1936384.0, - "err": 0.0987393707036972, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03271484375, - "total_bits": 1590016.0, - "err": 0.11865466088056564, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.130859375, - "total_bits": 1641472.0, - "err": 0.10900846123695374, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.193359375, - "total_bits": 1674240.0, - "err": 0.10564975440502167, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.505859375, - "total_bits": 1838080.0, - "err": 0.09383823722600937, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6904296875, - "total_bits": 1934848.0, - "err": 0.08624570071697235, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03271484375, - "total_bits": 2114304.0, - "err": 0.06035817041993141, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.130859375, - "total_bits": 2165760.0, - "err": 0.05209174379706383, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.255859375, - "total_bits": 2231296.0, - "err": 0.04948126897215843, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.380859375, - "total_bits": 2296832.0, - "err": 0.04883509874343872, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03271484375, - "total_bits": 2638592.0, - "err": 0.03018084354698658, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.255859375, - "total_bits": 2755584.0, - "err": 0.024935059249401093, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.380859375, - "total_bits": 2821120.0, - "err": 0.024701619520783424, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.505859375, - "total_bits": 2886656.0, - "err": 0.022890513762831688, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.818359375, - "total_bits": 3050496.0, - "err": 0.02186175249516964, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03271484375, - "total_bits": 3162880.0, - "err": 0.015356572344899178, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.130859375, - "total_bits": 3214336.0, - "err": 0.014548803679645061, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28271484375, - "total_bits": 3293952.0, - "err": 0.014337855391204357, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.130859375, - "total_bits": 4262912.0, - "err": 0.008344054222106934, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.15.self_attn.o_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.188232421875, - "total_bits": 9178112.0, - "err": 0.1943100094795227, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 9964544.0, - "err": 0.1714138686656952, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 11013120.0, - "err": 0.1638273447751999, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750732421875, - "total_bits": 11537408.0, - "err": 0.1421179622411728, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250732421875, - "total_bits": 13634560.0, - "err": 0.08752193301916122, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.688232421875, - "total_bits": 15469568.0, - "err": 0.08149602264165878, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 12714752.0, - "err": 0.10387716442346573, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 13110272.0, - "err": 0.09434641152620316, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.188232421875, - "total_bits": 13372416.0, - "err": 0.09085307270288467, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500732421875, - "total_bits": 14683136.0, - "err": 0.07625434547662735, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6878662109375, - "total_bits": 15468032.0, - "err": 0.06773252040147781, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 16909056.0, - "err": 0.05287368595600128, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 17304576.0, - "err": 0.04510347917675972, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.250732421875, - "total_bits": 17828864.0, - "err": 0.04210056737065315, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.375732421875, - "total_bits": 18353152.0, - "err": 0.04138030856847763, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 21103360.0, - "err": 0.026445619761943817, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.250732421875, - "total_bits": 22023168.0, - "err": 0.021945403888821602, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375732421875, - "total_bits": 22547456.0, - "err": 0.02175283618271351, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.500732421875, - "total_bits": 23071744.0, - "err": 0.019716981798410416, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.813232421875, - "total_bits": 24382464.0, - "err": 0.0187239870429039, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 25297664.0, - "err": 0.014051297679543495, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 25693184.0, - "err": 0.013997498899698257, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28143310546875, - "total_bits": 26346240.0, - "err": 0.013110113330185413, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 34081792.0, - "err": 0.009599673561751842, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.15.mlp.gate_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.187766335227273, - "total_bits": 25234432.000000004, - "err": 0.2044726461172104, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375266335227273, - "total_bits": 27397120.000000004, - "err": 0.1921168714761734, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625266335227273, - "total_bits": 30280704.000000004, - "err": 0.18793803453445435, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750266335227273, - "total_bits": 31722496.000000004, - "err": 0.1697796732187271, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250266335227273, - "total_bits": 37489664.0, - "err": 0.09575822949409485, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.687766335227273, - "total_bits": 42535936.0, - "err": 0.09230794757604599, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313165838068183, - "total_bits": 34964224.0, - "err": 0.10742579400539398, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125266335227273, - "total_bits": 36047872.0, - "err": 0.09945429116487503, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.187766335227273, - "total_bits": 36768768.0, - "err": 0.09765209257602692, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500266335227273, - "total_bits": 40373248.0, - "err": 0.08754933625459671, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6876331676136362, - "total_bits": 42534400.0, - "err": 0.08066391944885254, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031316583806818, - "total_bits": 46498560.0, - "err": 0.05467415973544121, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1252663352272725, - "total_bits": 47582208.0, - "err": 0.04758969694375992, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2502663352272725, - "total_bits": 49024000.0, - "err": 0.04594513028860092, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3752663352272725, - "total_bits": 50465792.0, - "err": 0.04554016888141632, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031316583806818, - "total_bits": 58032896.0, - "err": 0.02732066437602043, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2502663352272725, - "total_bits": 60558336.0, - "err": 0.023700831457972527, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3752663352272725, - "total_bits": 62000128.0, - "err": 0.023587679490447044, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5002663352272725, - "total_bits": 63441920.0, - "err": 0.022019624710083008, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.8127663352272725, - "total_bits": 67046400.0, - "err": 0.021278416737914085, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031316583806818, - "total_bits": 69567232.0, - "err": 0.014326523058116436, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1252663352272725, - "total_bits": 70650880.0, - "err": 0.01447458378970623, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.281316583806818, - "total_bits": 72450816.0, - "err": 0.013780984096229076, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125266335227273, - "total_bits": 93719552.0, - "err": 0.009660652838647366, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.15.mlp.up_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.187766335227273, - "total_bits": 25234432.000000004, - "err": 0.24100296199321747, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375266335227273, - "total_bits": 27397120.000000004, - "err": 0.22670172154903412, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625266335227273, - "total_bits": 30280704.000000004, - "err": 0.221915602684021, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750266335227273, - "total_bits": 31722496.000000004, - "err": 0.2006435990333557, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250266335227273, - "total_bits": 37489664.0, - "err": 0.11280632764101028, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.687766335227273, - "total_bits": 42535936.0, - "err": 0.10882232338190079, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313165838068183, - "total_bits": 34964224.0, - "err": 0.12637117505073547, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125266335227273, - "total_bits": 36047872.0, - "err": 0.11690416932106018, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.187766335227273, - "total_bits": 36768768.0, - "err": 0.11483556777238846, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500266335227273, - "total_bits": 40373248.0, - "err": 0.1030956357717514, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6876331676136362, - "total_bits": 42534400.0, - "err": 0.09505800902843475, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031316583806818, - "total_bits": 46498560.0, - "err": 0.06414397060871124, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1252663352272725, - "total_bits": 47582208.0, - "err": 0.055788543075323105, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2502663352272725, - "total_bits": 49024000.0, - "err": 0.0539216473698616, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3752663352272725, - "total_bits": 50465792.0, - "err": 0.05347602441906929, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031316583806818, - "total_bits": 58032896.0, - "err": 0.03202733024954796, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2502663352272725, - "total_bits": 60558336.0, - "err": 0.027412213385105133, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3752663352272725, - "total_bits": 62000128.0, - "err": 0.027279742062091827, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5002663352272725, - "total_bits": 63441920.0, - "err": 0.02539866603910923, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.8127663352272725, - "total_bits": 67046400.0, - "err": 0.02452448010444641, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031316583806818, - "total_bits": 69567232.0, - "err": 0.016492601484060287, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1252663352272725, - "total_bits": 70650880.0, - "err": 0.01609012670814991, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.281316583806818, - "total_bits": 72450816.0, - "err": 0.015845006331801414, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125266335227273, - "total_bits": 93719552.0, - "err": 0.009885009378194809, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.15.mlp.down_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1711869673295454, - "total_bits": 25043200.0, - "err": 0.25339797139167786, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 27402496.0, - "err": 0.22167639434337616, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 30286080.0, - "err": 0.20757003128528595, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.716641512784091, - "total_bits": 31334656.0, - "err": 0.18277552723884583, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.216641512784091, - "total_bits": 37101824.0, - "err": 0.11733514815568924, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7393687855113638, - "total_bits": 43131136.0, - "err": 0.10283797979354858, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 34965568.0, - "err": 0.14193710684776306, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 36053248.0, - "err": 0.13049396872520447, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1711869673295454, - "total_bits": 36577536.0, - "err": 0.12144117057323456, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.534823330965909, - "total_bits": 40771840.0, - "err": 0.09744420647621155, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.653775301846591, - "total_bits": 42143872.0, - "err": 0.09269967675209045, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 46499904.0, - "err": 0.07265681028366089, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 47587584.0, - "err": 0.06288593262434006, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.216641512784091, - "total_bits": 48636160.0, - "err": 0.056956030428409576, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.307550603693182, - "total_bits": 49684736.0, - "err": 0.05551191791892052, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 58034240.0, - "err": 0.03657349944114685, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.216641512784091, - "total_bits": 60170496.0, - "err": 0.030396558344364166, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.307550603693182, - "total_bits": 61219072.0, - "err": 0.029819710180163383, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.534823330965909, - "total_bits": 63840512.0, - "err": 0.026585238054394722, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.693914240056818, - "total_bits": 65675520.0, - "err": 0.025888055562973022, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 69568576.0, - "err": 0.019891129806637764, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 70656256.0, - "err": 0.02054774761199951, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.213251287286932, - "total_bits": 71665728.0, - "err": 0.018034208565950394, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 93724928.0, - "err": 0.014938939362764359, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.16.self_attn.q_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.188232421875, - "total_bits": 9178112.0, - "err": 0.10822924226522446, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 9964544.0, - "err": 0.10059483349323273, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 11013120.0, - "err": 0.09709326922893524, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750732421875, - "total_bits": 11537408.0, - "err": 0.08730972558259964, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250732421875, - "total_bits": 13634560.0, - "err": 0.05039703845977783, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.688232421875, - "total_bits": 15469568.0, - "err": 0.047658469527959824, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 12714752.0, - "err": 0.059522852301597595, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 13110272.0, - "err": 0.0540459007024765, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.188232421875, - "total_bits": 13372416.0, - "err": 0.05145954713225365, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500732421875, - "total_bits": 14683136.0, - "err": 0.04558468982577324, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6878662109375, - "total_bits": 15468032.0, - "err": 0.04232179746031761, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 16909056.0, - "err": 0.03025415726006031, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 17304576.0, - "err": 0.02586681954562664, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.250732421875, - "total_bits": 17828864.0, - "err": 0.024200398474931717, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.375732421875, - "total_bits": 18353152.0, - "err": 0.023781754076480865, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 21103360.0, - "err": 0.015132077038288116, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.250732421875, - "total_bits": 22023168.0, - "err": 0.012485052458941936, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375732421875, - "total_bits": 22547456.0, - "err": 0.012320172972977161, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.500732421875, - "total_bits": 23071744.0, - "err": 0.011512983590364456, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.813232421875, - "total_bits": 24382464.0, - "err": 0.01099366880953312, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 25297664.0, - "err": 0.00790650025010109, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 25693184.0, - "err": 0.007789588999003172, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28143310546875, - "total_bits": 26346240.0, - "err": 0.0073355077765882015, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 34081792.0, - "err": 0.005132666323333979, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.16.self_attn.k_proj", - "numel": 524288, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.193359375, - "total_bits": 1149952.0, - "err": 0.09286292642354965, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.380859375, - "total_bits": 1248256.0, - "err": 0.08634205162525177, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.630859375, - "total_bits": 1379328.0, - "err": 0.08299683779478073, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.755859375, - "total_bits": 1444864.0, - "err": 0.0739305391907692, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.255859375, - "total_bits": 1707008.0, - "err": 0.04266096651554108, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.693359375, - "total_bits": 1936384.0, - "err": 0.04031527414917946, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03271484375, - "total_bits": 1590016.0, - "err": 0.05072665959596634, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.130859375, - "total_bits": 1641472.0, - "err": 0.046751633286476135, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.193359375, - "total_bits": 1674240.0, - "err": 0.04374054819345474, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.505859375, - "total_bits": 1838080.0, - "err": 0.038644272834062576, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6904296875, - "total_bits": 1934848.0, - "err": 0.035542842000722885, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03271484375, - "total_bits": 2114304.0, - "err": 0.025483878329396248, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.130859375, - "total_bits": 2165760.0, - "err": 0.02223750203847885, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.255859375, - "total_bits": 2231296.0, - "err": 0.020521583035588264, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.380859375, - "total_bits": 2296832.0, - "err": 0.020076025277376175, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03271484375, - "total_bits": 2638592.0, - "err": 0.012821969576179981, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.255859375, - "total_bits": 2755584.0, - "err": 0.010553094558417797, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.380859375, - "total_bits": 2821120.0, - "err": 0.010352442972362041, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.505859375, - "total_bits": 2886656.0, - "err": 0.009715182706713676, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.818359375, - "total_bits": 3050496.0, - "err": 0.009213386103510857, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03271484375, - "total_bits": 3162880.0, - "err": 0.0066483537666499615, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.130859375, - "total_bits": 3214336.0, - "err": 0.006562703289091587, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28271484375, - "total_bits": 3293952.0, - "err": 0.006070367991924286, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.130859375, - "total_bits": 4262912.0, - "err": 0.004217375535517931, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.16.self_attn.v_proj", - "numel": 524288, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.193359375, - "total_bits": 1149952.0, - "err": 0.2442270666360855, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.380859375, - "total_bits": 1248256.0, - "err": 0.22836712002754211, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.630859375, - "total_bits": 1379328.0, - "err": 0.22253385186195374, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.755859375, - "total_bits": 1444864.0, - "err": 0.19966521859169006, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.255859375, - "total_bits": 1707008.0, - "err": 0.11383851617574692, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.693359375, - "total_bits": 1936384.0, - "err": 0.10902433097362518, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03271484375, - "total_bits": 1590016.0, - "err": 0.13024060428142548, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.130859375, - "total_bits": 1641472.0, - "err": 0.11924270540475845, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.193359375, - "total_bits": 1674240.0, - "err": 0.1159229576587677, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.505859375, - "total_bits": 1838080.0, - "err": 0.10353944450616837, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6904296875, - "total_bits": 1934848.0, - "err": 0.09505029767751694, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03271484375, - "total_bits": 2114304.0, - "err": 0.06667327135801315, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.130859375, - "total_bits": 2165760.0, - "err": 0.057092633098363876, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.255859375, - "total_bits": 2231296.0, - "err": 0.05437330901622772, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.380859375, - "total_bits": 2296832.0, - "err": 0.05372614413499832, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03271484375, - "total_bits": 2638592.0, - "err": 0.03327108174562454, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.255859375, - "total_bits": 2755584.0, - "err": 0.02742878720164299, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.380859375, - "total_bits": 2821120.0, - "err": 0.02721436507999897, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.505859375, - "total_bits": 2886656.0, - "err": 0.02527526021003723, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.818359375, - "total_bits": 3050496.0, - "err": 0.02419493906199932, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03271484375, - "total_bits": 3162880.0, - "err": 0.016972485929727554, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.130859375, - "total_bits": 3214336.0, - "err": 0.01597573608160019, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28271484375, - "total_bits": 3293952.0, - "err": 0.015821948647499084, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.130859375, - "total_bits": 4262912.0, - "err": 0.009287545457482338, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.16.self_attn.o_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.188232421875, - "total_bits": 9178112.0, - "err": 0.1999250054359436, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 9964544.0, - "err": 0.17933210730552673, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 11013120.0, - "err": 0.17236338555812836, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750732421875, - "total_bits": 11537408.0, - "err": 0.14832912385463715, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250732421875, - "total_bits": 13634560.0, - "err": 0.0917031392455101, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.688232421875, - "total_bits": 15469568.0, - "err": 0.08609487116336823, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 12714752.0, - "err": 0.10677428543567657, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 13110272.0, - "err": 0.09774401783943176, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.188232421875, - "total_bits": 13372416.0, - "err": 0.0948079526424408, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500732421875, - "total_bits": 14683136.0, - "err": 0.07915700972080231, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6878662109375, - "total_bits": 15468032.0, - "err": 0.07048824429512024, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 16909056.0, - "err": 0.054628416895866394, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 17304576.0, - "err": 0.04681554064154625, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.250732421875, - "total_bits": 17828864.0, - "err": 0.044151343405246735, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.375732421875, - "total_bits": 18353152.0, - "err": 0.043460771441459656, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 21103360.0, - "err": 0.02731623686850071, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.250732421875, - "total_bits": 22023168.0, - "err": 0.022949494421482086, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375732421875, - "total_bits": 22547456.0, - "err": 0.022770307958126068, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.500732421875, - "total_bits": 23071744.0, - "err": 0.02043898217380047, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.813232421875, - "total_bits": 24382464.0, - "err": 0.01939508132636547, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 25297664.0, - "err": 0.01443649921566248, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 25693184.0, - "err": 0.014482271857559681, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28143310546875, - "total_bits": 26346240.0, - "err": 0.013550742529332638, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 34081792.0, - "err": 0.009911181405186653, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.16.mlp.gate_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.187766335227273, - "total_bits": 25234432.000000004, - "err": 0.19981473684310913, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375266335227273, - "total_bits": 27397120.000000004, - "err": 0.1877259612083435, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625266335227273, - "total_bits": 30280704.000000004, - "err": 0.18361437320709229, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750266335227273, - "total_bits": 31722496.000000004, - "err": 0.16578513383865356, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250266335227273, - "total_bits": 37489664.0, - "err": 0.09361010789871216, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.687766335227273, - "total_bits": 42535936.0, - "err": 0.09021022915840149, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313165838068183, - "total_bits": 34964224.0, - "err": 0.10485216975212097, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125266335227273, - "total_bits": 36047872.0, - "err": 0.09716939181089401, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.187766335227273, - "total_bits": 36768768.0, - "err": 0.09537855535745621, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500266335227273, - "total_bits": 40373248.0, - "err": 0.08543601632118225, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6876331676136362, - "total_bits": 42534400.0, - "err": 0.0786757692694664, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031316583806818, - "total_bits": 46498560.0, - "err": 0.053382162004709244, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1252663352272725, - "total_bits": 47582208.0, - "err": 0.046455156058073044, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2502663352272725, - "total_bits": 49024000.0, - "err": 0.044856712222099304, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3752663352272725, - "total_bits": 50465792.0, - "err": 0.04446382075548172, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031316583806818, - "total_bits": 58032896.0, - "err": 0.026653004810214043, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2502663352272725, - "total_bits": 60558336.0, - "err": 0.02307705767452717, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3752663352272725, - "total_bits": 62000128.0, - "err": 0.022966334596276283, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5002663352272725, - "total_bits": 63441920.0, - "err": 0.021407928317785263, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.8127663352272725, - "total_bits": 67046400.0, - "err": 0.020691338926553726, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031316583806818, - "total_bits": 69567232.0, - "err": 0.013912727124989033, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1252663352272725, - "total_bits": 70650880.0, - "err": 0.013992633670568466, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.281316583806818, - "total_bits": 72450816.0, - "err": 0.01337452419102192, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125266335227273, - "total_bits": 93719552.0, - "err": 0.009212872013449669, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.16.mlp.up_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.187766335227273, - "total_bits": 25234432.000000004, - "err": 0.235944002866745, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375266335227273, - "total_bits": 27397120.000000004, - "err": 0.22180815041065216, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625266335227273, - "total_bits": 30280704.000000004, - "err": 0.21715134382247925, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750266335227273, - "total_bits": 31722496.000000004, - "err": 0.1961737722158432, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250266335227273, - "total_bits": 37489664.0, - "err": 0.11045753210783005, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.687766335227273, - "total_bits": 42535936.0, - "err": 0.10651304572820663, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313165838068183, - "total_bits": 34964224.0, - "err": 0.123642198741436, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125266335227273, - "total_bits": 36047872.0, - "err": 0.11448211967945099, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.187766335227273, - "total_bits": 36768768.0, - "err": 0.11253960430622101, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500266335227273, - "total_bits": 40373248.0, - "err": 0.10091295838356018, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6876331676136362, - "total_bits": 42534400.0, - "err": 0.09284292906522751, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031316583806818, - "total_bits": 46498560.0, - "err": 0.0627850741147995, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1252663352272725, - "total_bits": 47582208.0, - "err": 0.054667599499225616, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2502663352272725, - "total_bits": 49024000.0, - "err": 0.05284509435296059, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3752663352272725, - "total_bits": 50465792.0, - "err": 0.05238449573516846, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031316583806818, - "total_bits": 58032896.0, - "err": 0.03133920580148697, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2502663352272725, - "total_bits": 60558336.0, - "err": 0.026843352243304253, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3752663352272725, - "total_bits": 62000128.0, - "err": 0.02672029472887516, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5002663352272725, - "total_bits": 63441920.0, - "err": 0.024850018322467804, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.8127663352272725, - "total_bits": 67046400.0, - "err": 0.023977918550372124, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031316583806818, - "total_bits": 69567232.0, - "err": 0.0161124374717474, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1252663352272725, - "total_bits": 70650880.0, - "err": 0.015752088278532028, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.281316583806818, - "total_bits": 72450816.0, - "err": 0.015484253875911236, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125266335227273, - "total_bits": 93719552.0, - "err": 0.009659082628786564, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.16.mlp.down_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1711869673295454, - "total_bits": 25043200.0, - "err": 0.25168225169181824, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 27402496.0, - "err": 0.21974119544029236, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 30286080.0, - "err": 0.2066745162010193, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.716641512784091, - "total_bits": 31334656.0, - "err": 0.18218235671520233, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.216641512784091, - "total_bits": 37101824.0, - "err": 0.11656301468610764, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7393687855113638, - "total_bits": 43131136.0, - "err": 0.10245989263057709, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 34965568.0, - "err": 0.13961520791053772, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 36053248.0, - "err": 0.1278771013021469, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1711869673295454, - "total_bits": 36577536.0, - "err": 0.12072907388210297, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.534823330965909, - "total_bits": 40771840.0, - "err": 0.0965392142534256, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.653775301846591, - "total_bits": 42143872.0, - "err": 0.09183561056852341, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 46499904.0, - "err": 0.07148214429616928, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 47587584.0, - "err": 0.061679452657699585, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.216641512784091, - "total_bits": 48636160.0, - "err": 0.05653209239244461, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.307550603693182, - "total_bits": 49684736.0, - "err": 0.055276885628700256, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 58034240.0, - "err": 0.03608394414186478, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.216641512784091, - "total_bits": 60170496.0, - "err": 0.03008347749710083, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.307550603693182, - "total_bits": 61219072.0, - "err": 0.029628759250044823, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.534823330965909, - "total_bits": 63840512.0, - "err": 0.026226023212075233, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.693914240056818, - "total_bits": 65675520.0, - "err": 0.025651566684246063, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 69568576.0, - "err": 0.01970449648797512, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 70656256.0, - "err": 0.020119845867156982, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.213251287286932, - "total_bits": 71665728.0, - "err": 0.018042558804154396, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 93724928.0, - "err": 0.014590544626116753, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.17.self_attn.q_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.188232421875, - "total_bits": 9178112.0, - "err": 0.10323455184698105, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 9964544.0, - "err": 0.09614597260951996, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 11013120.0, - "err": 0.09292672574520111, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750732421875, - "total_bits": 11537408.0, - "err": 0.08347738534212112, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250732421875, - "total_bits": 13634560.0, - "err": 0.04806825891137123, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.688232421875, - "total_bits": 15469568.0, - "err": 0.04555603489279747, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 12714752.0, - "err": 0.05683369189500809, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 13110272.0, - "err": 0.05155910551548004, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.188232421875, - "total_bits": 13372416.0, - "err": 0.04909774288535118, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500732421875, - "total_bits": 14683136.0, - "err": 0.043547213077545166, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6878662109375, - "total_bits": 15468032.0, - "err": 0.040455713868141174, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 16909056.0, - "err": 0.028824402019381523, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 17304576.0, - "err": 0.024661948904395103, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.250732421875, - "total_bits": 17828864.0, - "err": 0.02309466153383255, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.375732421875, - "total_bits": 18353152.0, - "err": 0.02272476814687252, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 21103360.0, - "err": 0.014394158497452736, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.250732421875, - "total_bits": 22023168.0, - "err": 0.011889693327248096, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375732421875, - "total_bits": 22547456.0, - "err": 0.011733705177903175, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.500732421875, - "total_bits": 23071744.0, - "err": 0.010962671600282192, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.813232421875, - "total_bits": 24382464.0, - "err": 0.010470626875758171, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 25297664.0, - "err": 0.007525456137955189, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 25693184.0, - "err": 0.007369620725512505, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28143310546875, - "total_bits": 26346240.0, - "err": 0.0069959089159965515, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 34081792.0, - "err": 0.004806107841432095, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.17.self_attn.k_proj", - "numel": 524288, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.193359375, - "total_bits": 1149952.0, - "err": 0.09214146435260773, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.380859375, - "total_bits": 1248256.0, - "err": 0.08603856712579727, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.630859375, - "total_bits": 1379328.0, - "err": 0.0823829397559166, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.755859375, - "total_bits": 1444864.0, - "err": 0.07379361987113953, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.255859375, - "total_bits": 1707008.0, - "err": 0.04253625124692917, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.693359375, - "total_bits": 1936384.0, - "err": 0.040140580385923386, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03271484375, - "total_bits": 1590016.0, - "err": 0.05045169219374657, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.130859375, - "total_bits": 1641472.0, - "err": 0.04661019146442413, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.193359375, - "total_bits": 1674240.0, - "err": 0.04348728060722351, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.505859375, - "total_bits": 1838080.0, - "err": 0.03862827271223068, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6904296875, - "total_bits": 1934848.0, - "err": 0.03559477627277374, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03271484375, - "total_bits": 2114304.0, - "err": 0.025491653010249138, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.130859375, - "total_bits": 2165760.0, - "err": 0.022210003808140755, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.255859375, - "total_bits": 2231296.0, - "err": 0.020438887178897858, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.380859375, - "total_bits": 2296832.0, - "err": 0.019984034821391106, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03271484375, - "total_bits": 2638592.0, - "err": 0.012736033648252487, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.255859375, - "total_bits": 2755584.0, - "err": 0.01042227167636156, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.380859375, - "total_bits": 2821120.0, - "err": 0.010221335105597973, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.505859375, - "total_bits": 2886656.0, - "err": 0.00961008109152317, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.818359375, - "total_bits": 3050496.0, - "err": 0.009085375815629959, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03271484375, - "total_bits": 3162880.0, - "err": 0.006603911519050598, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.130859375, - "total_bits": 3214336.0, - "err": 0.006398091092705727, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28271484375, - "total_bits": 3293952.0, - "err": 0.006020551081746817, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.130859375, - "total_bits": 4262912.0, - "err": 0.003925190772861242, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.17.self_attn.v_proj", - "numel": 524288, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.193359375, - "total_bits": 1149952.0, - "err": 0.26388123631477356, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.380859375, - "total_bits": 1248256.0, - "err": 0.2458975911140442, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.630859375, - "total_bits": 1379328.0, - "err": 0.2390086054801941, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.755859375, - "total_bits": 1444864.0, - "err": 0.21401089429855347, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.255859375, - "total_bits": 1707008.0, - "err": 0.12308905273675919, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.693359375, - "total_bits": 1936384.0, - "err": 0.11731402575969696, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03271484375, - "total_bits": 1590016.0, - "err": 0.14059947431087494, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.130859375, - "total_bits": 1641472.0, - "err": 0.12913081049919128, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.193359375, - "total_bits": 1674240.0, - "err": 0.12541893124580383, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.505859375, - "total_bits": 1838080.0, - "err": 0.1113034263253212, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6904296875, - "total_bits": 1934848.0, - "err": 0.1022542342543602, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03271484375, - "total_bits": 2114304.0, - "err": 0.0718047171831131, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.130859375, - "total_bits": 2165760.0, - "err": 0.06188633292913437, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.255859375, - "total_bits": 2231296.0, - "err": 0.058843497186899185, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.380859375, - "total_bits": 2296832.0, - "err": 0.05814780294895172, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03271484375, - "total_bits": 2638592.0, - "err": 0.036006707698106766, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.255859375, - "total_bits": 2755584.0, - "err": 0.02987562119960785, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.380859375, - "total_bits": 2821120.0, - "err": 0.029598355293273926, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.505859375, - "total_bits": 2886656.0, - "err": 0.027424871921539307, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.818359375, - "total_bits": 3050496.0, - "err": 0.026239311322569847, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03271484375, - "total_bits": 3162880.0, - "err": 0.018426168709993362, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.130859375, - "total_bits": 3214336.0, - "err": 0.017662236467003822, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28271484375, - "total_bits": 3293952.0, - "err": 0.017165163531899452, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.130859375, - "total_bits": 4262912.0, - "err": 0.010672338306903839, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.17.self_attn.o_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.188232421875, - "total_bits": 9178112.0, - "err": 0.2459607571363449, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 9964544.0, - "err": 0.2267545759677887, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 11013120.0, - "err": 0.22041763365268707, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750732421875, - "total_bits": 11537408.0, - "err": 0.19364818930625916, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250732421875, - "total_bits": 13634560.0, - "err": 0.11371995508670807, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.688232421875, - "total_bits": 15469568.0, - "err": 0.10816104710102081, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 12714752.0, - "err": 0.1306772232055664, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 13110272.0, - "err": 0.11947256326675415, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.188232421875, - "total_bits": 13372416.0, - "err": 0.11660513281822205, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500732421875, - "total_bits": 14683136.0, - "err": 0.10119254887104034, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6878662109375, - "total_bits": 15468032.0, - "err": 0.09186974912881851, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 16909056.0, - "err": 0.06706510484218597, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 17304576.0, - "err": 0.05726785585284233, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.250732421875, - "total_bits": 17828864.0, - "err": 0.05460602790117264, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.375732421875, - "total_bits": 18353152.0, - "err": 0.053988486528396606, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 21103360.0, - "err": 0.03351868316531181, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.250732421875, - "total_bits": 22023168.0, - "err": 0.028232775628566742, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375732421875, - "total_bits": 22547456.0, - "err": 0.02803698554635048, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.500732421875, - "total_bits": 23071744.0, - "err": 0.02568143606185913, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.813232421875, - "total_bits": 24382464.0, - "err": 0.02455081231892109, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 25297664.0, - "err": 0.017629655078053474, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 25693184.0, - "err": 0.01746620237827301, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28143310546875, - "total_bits": 26346240.0, - "err": 0.01674572564661503, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 34081792.0, - "err": 0.011689658276736736, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.17.mlp.gate_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.187766335227273, - "total_bits": 25234432.000000004, - "err": 0.2055162787437439, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375266335227273, - "total_bits": 27397120.000000004, - "err": 0.19295454025268555, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625266335227273, - "total_bits": 30280704.000000004, - "err": 0.18878836929798126, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750266335227273, - "total_bits": 31722496.000000004, - "err": 0.17023184895515442, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250266335227273, - "total_bits": 37489664.0, - "err": 0.0962972417473793, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.687766335227273, - "total_bits": 42535936.0, - "err": 0.09282128512859344, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313165838068183, - "total_bits": 34964224.0, - "err": 0.10789647698402405, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125266335227273, - "total_bits": 36047872.0, - "err": 0.09999464452266693, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.187766335227273, - "total_bits": 36768768.0, - "err": 0.09814979135990143, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500266335227273, - "total_bits": 40373248.0, - "err": 0.0878237932920456, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6876331676136362, - "total_bits": 42534400.0, - "err": 0.08096134662628174, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031316583806818, - "total_bits": 46498560.0, - "err": 0.05489988625049591, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1252663352272725, - "total_bits": 47582208.0, - "err": 0.04782678559422493, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2502663352272725, - "total_bits": 49024000.0, - "err": 0.046207353472709656, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3752663352272725, - "total_bits": 50465792.0, - "err": 0.04579300060868263, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031316583806818, - "total_bits": 58032896.0, - "err": 0.02740965597331524, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2502663352272725, - "total_bits": 60558336.0, - "err": 0.023733625188469887, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3752663352272725, - "total_bits": 62000128.0, - "err": 0.023618195205926895, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5002663352272725, - "total_bits": 63441920.0, - "err": 0.022006137296557426, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.8127663352272725, - "total_bits": 67046400.0, - "err": 0.02126542292535305, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031316583806818, - "total_bits": 69567232.0, - "err": 0.014299406670033932, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1252663352272725, - "total_bits": 70650880.0, - "err": 0.014366436749696732, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.281316583806818, - "total_bits": 72450816.0, - "err": 0.013747259974479675, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125266335227273, - "total_bits": 93719552.0, - "err": 0.009420826099812984, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.17.mlp.up_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.187766335227273, - "total_bits": 25234432.000000004, - "err": 0.23796504735946655, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375266335227273, - "total_bits": 27397120.000000004, - "err": 0.2236231416463852, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625266335227273, - "total_bits": 30280704.000000004, - "err": 0.21888066828250885, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750266335227273, - "total_bits": 31722496.000000004, - "err": 0.1974191814661026, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250266335227273, - "total_bits": 37489664.0, - "err": 0.11140873283147812, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.687766335227273, - "total_bits": 42535936.0, - "err": 0.10734745115041733, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313165838068183, - "total_bits": 34964224.0, - "err": 0.12473443150520325, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125266335227273, - "total_bits": 36047872.0, - "err": 0.115511454641819, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.187766335227273, - "total_bits": 36768768.0, - "err": 0.11348040401935577, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500266335227273, - "total_bits": 40373248.0, - "err": 0.10172934830188751, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6876331676136362, - "total_bits": 42534400.0, - "err": 0.09358196705579758, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031316583806818, - "total_bits": 46498560.0, - "err": 0.0633312538266182, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1252663352272725, - "total_bits": 47582208.0, - "err": 0.055116813629865646, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2502663352272725, - "total_bits": 49024000.0, - "err": 0.05326232314109802, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3752663352272725, - "total_bits": 50465792.0, - "err": 0.05282840132713318, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031316583806818, - "total_bits": 58032896.0, - "err": 0.03160829097032547, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2502663352272725, - "total_bits": 60558336.0, - "err": 0.027040230110287666, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3752663352272725, - "total_bits": 62000128.0, - "err": 0.02691192738711834, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5002663352272725, - "total_bits": 63441920.0, - "err": 0.02502099983394146, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.8127663352272725, - "total_bits": 67046400.0, - "err": 0.024152180179953575, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031316583806818, - "total_bits": 69567232.0, - "err": 0.0162388626486063, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1252663352272725, - "total_bits": 70650880.0, - "err": 0.015866603702306747, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.281316583806818, - "total_bits": 72450816.0, - "err": 0.015588713809847832, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125266335227273, - "total_bits": 93719552.0, - "err": 0.009705601260066032, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.17.mlp.down_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1711869673295454, - "total_bits": 25043200.0, - "err": 0.26269277930259705, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 27402496.0, - "err": 0.23044294118881226, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 30286080.0, - "err": 0.21775226294994354, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.716641512784091, - "total_bits": 31334656.0, - "err": 0.19179731607437134, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.216641512784091, - "total_bits": 37101824.0, - "err": 0.121578648686409, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7393687855113638, - "total_bits": 43131136.0, - "err": 0.10775730013847351, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 34965568.0, - "err": 0.14481636881828308, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 36053248.0, - "err": 0.1326155811548233, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1711869673295454, - "total_bits": 36577536.0, - "err": 0.1259133368730545, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.534823330965909, - "total_bits": 40771840.0, - "err": 0.10124481469392776, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.653775301846591, - "total_bits": 42143872.0, - "err": 0.09599152207374573, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 46499904.0, - "err": 0.07391276955604553, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 47587584.0, - "err": 0.06381690502166748, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.216641512784091, - "total_bits": 48636160.0, - "err": 0.05883260816335678, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.307550603693182, - "total_bits": 49684736.0, - "err": 0.057607587426900864, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 58034240.0, - "err": 0.03711599111557007, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.216641512784091, - "total_bits": 60170496.0, - "err": 0.030969232320785522, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.307550603693182, - "total_bits": 61219072.0, - "err": 0.030537350103259087, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.534823330965909, - "total_bits": 63840512.0, - "err": 0.02697579376399517, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.693914240056818, - "total_bits": 65675520.0, - "err": 0.02642734721302986, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 69568576.0, - "err": 0.019979404285550117, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 70656256.0, - "err": 0.020254027098417282, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.213251287286932, - "total_bits": 71665728.0, - "err": 0.01835278421640396, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 93724928.0, - "err": 0.014246649108827114, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.18.self_attn.q_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.188232421875, - "total_bits": 9178112.0, - "err": 0.11736690253019333, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 9964544.0, - "err": 0.10917483270168304, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 11013120.0, - "err": 0.10549276322126389, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750732421875, - "total_bits": 11537408.0, - "err": 0.0946229100227356, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250732421875, - "total_bits": 13634560.0, - "err": 0.05460654944181442, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.688232421875, - "total_bits": 15469568.0, - "err": 0.051861461251974106, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 12714752.0, - "err": 0.06417921930551529, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 13110272.0, - "err": 0.05860493704676628, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.188232421875, - "total_bits": 13372416.0, - "err": 0.05588499456644058, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500732421875, - "total_bits": 14683136.0, - "err": 0.04946497455239296, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6878662109375, - "total_bits": 15468032.0, - "err": 0.04586106166243553, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 16909056.0, - "err": 0.03262199088931084, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 17304576.0, - "err": 0.02803787589073181, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.250732421875, - "total_bits": 17828864.0, - "err": 0.02623177133500576, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.375732421875, - "total_bits": 18353152.0, - "err": 0.02578507736325264, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 21103360.0, - "err": 0.01634353958070278, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.250732421875, - "total_bits": 22023168.0, - "err": 0.013502025976777077, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375732421875, - "total_bits": 22547456.0, - "err": 0.013332119211554527, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.500732421875, - "total_bits": 23071744.0, - "err": 0.012463856488466263, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.813232421875, - "total_bits": 24382464.0, - "err": 0.011890250258147717, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 25297664.0, - "err": 0.008539151400327682, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 25693184.0, - "err": 0.008368495851755142, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28143310546875, - "total_bits": 26346240.0, - "err": 0.007900129072368145, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 34081792.0, - "err": 0.005426785908639431, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.18.self_attn.k_proj", - "numel": 524288, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.193359375, - "total_bits": 1149952.0, - "err": 0.09787007421255112, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.380859375, - "total_bits": 1248256.0, - "err": 0.09116828441619873, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.630859375, - "total_bits": 1379328.0, - "err": 0.08735904842615128, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.755859375, - "total_bits": 1444864.0, - "err": 0.07814030349254608, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.255859375, - "total_bits": 1707008.0, - "err": 0.045309942215681076, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.693359375, - "total_bits": 1936384.0, - "err": 0.0425369031727314, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03271484375, - "total_bits": 1590016.0, - "err": 0.05364704877138138, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.130859375, - "total_bits": 1641472.0, - "err": 0.04955984652042389, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.193359375, - "total_bits": 1674240.0, - "err": 0.046150773763656616, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.505859375, - "total_bits": 1838080.0, - "err": 0.04096752405166626, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6904296875, - "total_bits": 1934848.0, - "err": 0.037694185972213745, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03271484375, - "total_bits": 2114304.0, - "err": 0.02710822783410549, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.130859375, - "total_bits": 2165760.0, - "err": 0.023731647059321404, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.255859375, - "total_bits": 2231296.0, - "err": 0.021686071529984474, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.380859375, - "total_bits": 2296832.0, - "err": 0.021196361631155014, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03271484375, - "total_bits": 2638592.0, - "err": 0.013544146902859211, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.255859375, - "total_bits": 2755584.0, - "err": 0.011114140041172504, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.380859375, - "total_bits": 2821120.0, - "err": 0.010876869782805443, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.505859375, - "total_bits": 2886656.0, - "err": 0.01024420466274023, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.818359375, - "total_bits": 3050496.0, - "err": 0.009672969579696655, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03271484375, - "total_bits": 3162880.0, - "err": 0.007045038975775242, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.130859375, - "total_bits": 3214336.0, - "err": 0.006918408442288637, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28271484375, - "total_bits": 3293952.0, - "err": 0.0064217871986329556, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.130859375, - "total_bits": 4262912.0, - "err": 0.004346799571067095, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.18.self_attn.v_proj", - "numel": 524288, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.193359375, - "total_bits": 1149952.0, - "err": 0.2503192722797394, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.380859375, - "total_bits": 1248256.0, - "err": 0.2320861518383026, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.630859375, - "total_bits": 1379328.0, - "err": 0.22518686950206757, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.755859375, - "total_bits": 1444864.0, - "err": 0.20090511441230774, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.255859375, - "total_bits": 1707008.0, - "err": 0.11593971401453018, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.693359375, - "total_bits": 1936384.0, - "err": 0.11049717664718628, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03271484375, - "total_bits": 1590016.0, - "err": 0.13307371735572815, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.130859375, - "total_bits": 1641472.0, - "err": 0.12226779013872147, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.193359375, - "total_bits": 1674240.0, - "err": 0.11861562728881836, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.505859375, - "total_bits": 1838080.0, - "err": 0.10440407693386078, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6904296875, - "total_bits": 1934848.0, - "err": 0.09605646133422852, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03271484375, - "total_bits": 2114304.0, - "err": 0.06787654012441635, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.130859375, - "total_bits": 2165760.0, - "err": 0.058458615094423294, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.255859375, - "total_bits": 2231296.0, - "err": 0.0554332435131073, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.380859375, - "total_bits": 2296832.0, - "err": 0.05474475398659706, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03271484375, - "total_bits": 2638592.0, - "err": 0.03393416106700897, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.255859375, - "total_bits": 2755584.0, - "err": 0.028147414326667786, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.380859375, - "total_bits": 2821120.0, - "err": 0.027874644845724106, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.505859375, - "total_bits": 2886656.0, - "err": 0.025729995220899582, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.818359375, - "total_bits": 3050496.0, - "err": 0.024606479331851006, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03271484375, - "total_bits": 3162880.0, - "err": 0.017354687675833702, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.130859375, - "total_bits": 3214336.0, - "err": 0.016642093658447266, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28271484375, - "total_bits": 3293952.0, - "err": 0.016159337013959885, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.130859375, - "total_bits": 4262912.0, - "err": 0.009918580763041973, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.18.self_attn.o_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.188232421875, - "total_bits": 9178112.0, - "err": 0.17084570229053497, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 9964544.0, - "err": 0.15062806010246277, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 11013120.0, - "err": 0.14428479969501495, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750732421875, - "total_bits": 11537408.0, - "err": 0.12332943081855774, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250732421875, - "total_bits": 13634560.0, - "err": 0.07788542658090591, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.688232421875, - "total_bits": 15469568.0, - "err": 0.0720711350440979, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 12714752.0, - "err": 0.09111173450946808, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 13110272.0, - "err": 0.08307477086782455, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.188232421875, - "total_bits": 13372416.0, - "err": 0.08079110831022263, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500732421875, - "total_bits": 14683136.0, - "err": 0.06531613320112228, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6878662109375, - "total_bits": 15468032.0, - "err": 0.059640005230903625, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 16909056.0, - "err": 0.04653681814670563, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 17304576.0, - "err": 0.03985645994544029, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.250732421875, - "total_bits": 17828864.0, - "err": 0.0374995656311512, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.375732421875, - "total_bits": 18353152.0, - "err": 0.03692398965358734, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 21103360.0, - "err": 0.023329343646764755, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.250732421875, - "total_bits": 22023168.0, - "err": 0.019646449014544487, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375732421875, - "total_bits": 22547456.0, - "err": 0.019490692764520645, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.500732421875, - "total_bits": 23071744.0, - "err": 0.017182601615786552, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.813232421875, - "total_bits": 24382464.0, - "err": 0.016263967379927635, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 25297664.0, - "err": 0.012517282739281654, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 25693184.0, - "err": 0.012585146352648735, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28143310546875, - "total_bits": 26346240.0, - "err": 0.011763520538806915, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 34081792.0, - "err": 0.008806225843727589, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.18.mlp.gate_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.187766335227273, - "total_bits": 25234432.000000004, - "err": 0.20379741489887238, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375266335227273, - "total_bits": 27397120.000000004, - "err": 0.1914636194705963, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625266335227273, - "total_bits": 30280704.000000004, - "err": 0.18730969727039337, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750266335227273, - "total_bits": 31722496.000000004, - "err": 0.1691495031118393, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250266335227273, - "total_bits": 37489664.0, - "err": 0.09553074091672897, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.687766335227273, - "total_bits": 42535936.0, - "err": 0.09203460067510605, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313165838068183, - "total_bits": 34964224.0, - "err": 0.10691617429256439, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125266335227273, - "total_bits": 36047872.0, - "err": 0.09909786283969879, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.187766335227273, - "total_bits": 36768768.0, - "err": 0.09736144542694092, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500266335227273, - "total_bits": 40373248.0, - "err": 0.08718618750572205, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6876331676136362, - "total_bits": 42534400.0, - "err": 0.08032502979040146, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031316583806818, - "total_bits": 46498560.0, - "err": 0.05450870469212532, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1252663352272725, - "total_bits": 47582208.0, - "err": 0.04743090644478798, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2502663352272725, - "total_bits": 49024000.0, - "err": 0.045815303921699524, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3752663352272725, - "total_bits": 50465792.0, - "err": 0.045423321425914764, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031316583806818, - "total_bits": 58032896.0, - "err": 0.027204303070902824, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2502663352272725, - "total_bits": 60558336.0, - "err": 0.0235799178481102, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3752663352272725, - "total_bits": 62000128.0, - "err": 0.023464087396860123, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5002663352272725, - "total_bits": 63441920.0, - "err": 0.021877920255064964, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.8127663352272725, - "total_bits": 67046400.0, - "err": 0.021137703210115433, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031316583806818, - "total_bits": 69567232.0, - "err": 0.014233383350074291, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1252663352272725, - "total_bits": 70650880.0, - "err": 0.014324466697871685, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.281316583806818, - "total_bits": 72450816.0, - "err": 0.013693789020180702, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125266335227273, - "total_bits": 93719552.0, - "err": 0.009451783262193203, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.18.mlp.up_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.187766335227273, - "total_bits": 25234432.000000004, - "err": 0.23085734248161316, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375266335227273, - "total_bits": 27397120.000000004, - "err": 0.2170666754245758, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625266335227273, - "total_bits": 30280704.000000004, - "err": 0.21243822574615479, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750266335227273, - "total_bits": 31722496.000000004, - "err": 0.19175592064857483, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250266335227273, - "total_bits": 37489664.0, - "err": 0.10814829170703888, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.687766335227273, - "total_bits": 42535936.0, - "err": 0.10423750430345535, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313165838068183, - "total_bits": 34964224.0, - "err": 0.12090165913105011, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125266335227273, - "total_bits": 36047872.0, - "err": 0.11200838536024094, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.187766335227273, - "total_bits": 36768768.0, - "err": 0.1101686954498291, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500266335227273, - "total_bits": 40373248.0, - "err": 0.0987384244799614, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6876331676136362, - "total_bits": 42534400.0, - "err": 0.09091567993164062, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031316583806818, - "total_bits": 46498560.0, - "err": 0.06143694370985031, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1252663352272725, - "total_bits": 47582208.0, - "err": 0.05349542573094368, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2502663352272725, - "total_bits": 49024000.0, - "err": 0.05171268805861473, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3752663352272725, - "total_bits": 50465792.0, - "err": 0.05128282681107521, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031316583806818, - "total_bits": 58032896.0, - "err": 0.03064613975584507, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2502663352272725, - "total_bits": 60558336.0, - "err": 0.026260897517204285, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3752663352272725, - "total_bits": 62000128.0, - "err": 0.026136618107557297, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5002663352272725, - "total_bits": 63441920.0, - "err": 0.024294311180710793, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.8127663352272725, - "total_bits": 67046400.0, - "err": 0.023449769243597984, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031316583806818, - "total_bits": 69567232.0, - "err": 0.015766749158501625, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1252663352272725, - "total_bits": 70650880.0, - "err": 0.015408855862915516, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.281316583806818, - "total_bits": 72450816.0, - "err": 0.01516028679907322, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125266335227273, - "total_bits": 93719552.0, - "err": 0.009443101473152637, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.18.mlp.down_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1711869673295454, - "total_bits": 25043200.0, - "err": 0.2456955462694168, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 27402496.0, - "err": 0.21512866020202637, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 30286080.0, - "err": 0.2026519477367401, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.716641512784091, - "total_bits": 31334656.0, - "err": 0.17808261513710022, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.216641512784091, - "total_bits": 37101824.0, - "err": 0.1136489287018776, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7393687855113638, - "total_bits": 43131136.0, - "err": 0.10019978880882263, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 34965568.0, - "err": 0.13570651412010193, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 36053248.0, - "err": 0.12485744804143906, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1711869673295454, - "total_bits": 36577536.0, - "err": 0.11763780564069748, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.534823330965909, - "total_bits": 40771840.0, - "err": 0.09440860897302628, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.653775301846591, - "total_bits": 42143872.0, - "err": 0.08967175334692001, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 46499904.0, - "err": 0.06962679326534271, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 47587584.0, - "err": 0.0601961724460125, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.216641512784091, - "total_bits": 48636160.0, - "err": 0.05499005317687988, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.307550603693182, - "total_bits": 49684736.0, - "err": 0.053709544241428375, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 58034240.0, - "err": 0.03503815457224846, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.216641512784091, - "total_bits": 60170496.0, - "err": 0.029019370675086975, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.307550603693182, - "total_bits": 61219072.0, - "err": 0.028536120429635048, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.534823330965909, - "total_bits": 63840512.0, - "err": 0.025270985439419746, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.693914240056818, - "total_bits": 65675520.0, - "err": 0.02467251941561699, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 69568576.0, - "err": 0.018875395879149437, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 70656256.0, - "err": 0.019169360399246216, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.213251287286932, - "total_bits": 71665728.0, - "err": 0.01718616858124733, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 93724928.0, - "err": 0.013536527752876282, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.19.self_attn.q_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.188232421875, - "total_bits": 9178112.0, - "err": 0.11614827811717987, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 9964544.0, - "err": 0.1082979142665863, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 11013120.0, - "err": 0.10470934212207794, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750732421875, - "total_bits": 11537408.0, - "err": 0.094061940908432, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250732421875, - "total_bits": 13634560.0, - "err": 0.05426718667149544, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.688232421875, - "total_bits": 15469568.0, - "err": 0.05145934224128723, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 12714752.0, - "err": 0.06385574489831924, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 13110272.0, - "err": 0.05801210179924965, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.188232421875, - "total_bits": 13372416.0, - "err": 0.05540022999048233, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500732421875, - "total_bits": 14683136.0, - "err": 0.049145136028528214, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6878662109375, - "total_bits": 15468032.0, - "err": 0.04560882970690727, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 16909056.0, - "err": 0.032429810613393784, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 17304576.0, - "err": 0.02777821198105812, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.250732421875, - "total_bits": 17828864.0, - "err": 0.02610420249402523, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.375732421875, - "total_bits": 18353152.0, - "err": 0.025699323043227196, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 21103360.0, - "err": 0.016249768435955048, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.250732421875, - "total_bits": 22023168.0, - "err": 0.013452080078423023, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375732421875, - "total_bits": 22547456.0, - "err": 0.013279465027153492, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.500732421875, - "total_bits": 23071744.0, - "err": 0.012400222942233086, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.813232421875, - "total_bits": 24382464.0, - "err": 0.011847807094454765, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 25297664.0, - "err": 0.008539984002709389, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 25693184.0, - "err": 0.00832061842083931, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28143310546875, - "total_bits": 26346240.0, - "err": 0.007968463003635406, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 34081792.0, - "err": 0.005421579349786043, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.19.self_attn.k_proj", - "numel": 524288, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.193359375, - "total_bits": 1149952.0, - "err": 0.09885676205158234, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.380859375, - "total_bits": 1248256.0, - "err": 0.09208865463733673, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.630859375, - "total_bits": 1379328.0, - "err": 0.0882568433880806, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.755859375, - "total_bits": 1444864.0, - "err": 0.07880894094705582, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.255859375, - "total_bits": 1707008.0, - "err": 0.04560166224837303, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.693359375, - "total_bits": 1936384.0, - "err": 0.043200455605983734, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03271484375, - "total_bits": 1590016.0, - "err": 0.05392823368310928, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.130859375, - "total_bits": 1641472.0, - "err": 0.049965210258960724, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.193359375, - "total_bits": 1674240.0, - "err": 0.04669644683599472, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.505859375, - "total_bits": 1838080.0, - "err": 0.04127071797847748, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6904296875, - "total_bits": 1934848.0, - "err": 0.03805288299918175, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03271484375, - "total_bits": 2114304.0, - "err": 0.02721519023180008, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.130859375, - "total_bits": 2165760.0, - "err": 0.023751962929964066, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.255859375, - "total_bits": 2231296.0, - "err": 0.0219329372048378, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.380859375, - "total_bits": 2296832.0, - "err": 0.021505970507860184, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03271484375, - "total_bits": 2638592.0, - "err": 0.013613506220281124, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.255859375, - "total_bits": 2755584.0, - "err": 0.011222345754504204, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.380859375, - "total_bits": 2821120.0, - "err": 0.01102576032280922, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.505859375, - "total_bits": 2886656.0, - "err": 0.010321643203496933, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.818359375, - "total_bits": 3050496.0, - "err": 0.009765313006937504, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03271484375, - "total_bits": 3162880.0, - "err": 0.007063053548336029, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.130859375, - "total_bits": 3214336.0, - "err": 0.006917238235473633, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28271484375, - "total_bits": 3293952.0, - "err": 0.006483436096459627, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.130859375, - "total_bits": 4262912.0, - "err": 0.00431250361725688, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.19.self_attn.v_proj", - "numel": 524288, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.193359375, - "total_bits": 1149952.0, - "err": 0.2617536783218384, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.380859375, - "total_bits": 1248256.0, - "err": 0.24406984448432922, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.630859375, - "total_bits": 1379328.0, - "err": 0.2385285198688507, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.755859375, - "total_bits": 1444864.0, - "err": 0.21364331245422363, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.255859375, - "total_bits": 1707008.0, - "err": 0.12203836441040039, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.693359375, - "total_bits": 1936384.0, - "err": 0.11693629622459412, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03271484375, - "total_bits": 1590016.0, - "err": 0.1390274614095688, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.130859375, - "total_bits": 1641472.0, - "err": 0.127631276845932, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.193359375, - "total_bits": 1674240.0, - "err": 0.12441415339708328, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.505859375, - "total_bits": 1838080.0, - "err": 0.11052580177783966, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6904296875, - "total_bits": 1934848.0, - "err": 0.10164525359869003, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03271484375, - "total_bits": 2114304.0, - "err": 0.07080923020839691, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.130859375, - "total_bits": 2165760.0, - "err": 0.061102282255887985, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.255859375, - "total_bits": 2231296.0, - "err": 0.0582919605076313, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.380859375, - "total_bits": 2296832.0, - "err": 0.05761849880218506, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03271484375, - "total_bits": 2638592.0, - "err": 0.03527965769171715, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.255859375, - "total_bits": 2755584.0, - "err": 0.029380422085523605, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.380859375, - "total_bits": 2821120.0, - "err": 0.029155077412724495, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.505859375, - "total_bits": 2886656.0, - "err": 0.02693808265030384, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.818359375, - "total_bits": 3050496.0, - "err": 0.025874348357319832, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03271484375, - "total_bits": 3162880.0, - "err": 0.018039558082818985, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.130859375, - "total_bits": 3214336.0, - "err": 0.017132889479398727, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28271484375, - "total_bits": 3293952.0, - "err": 0.016922691836953163, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.130859375, - "total_bits": 4262912.0, - "err": 0.009973965585231781, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.19.self_attn.o_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.188232421875, - "total_bits": 9178112.0, - "err": 0.20656073093414307, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 9964544.0, - "err": 0.1898573637008667, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 11013120.0, - "err": 0.18447396159172058, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750732421875, - "total_bits": 11537408.0, - "err": 0.1617172360420227, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250732421875, - "total_bits": 13634560.0, - "err": 0.09577317535877228, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.688232421875, - "total_bits": 15469568.0, - "err": 0.09111638367176056, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 12714752.0, - "err": 0.10919947177171707, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 13110272.0, - "err": 0.10020292550325394, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.188232421875, - "total_bits": 13372416.0, - "err": 0.09816282987594604, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500732421875, - "total_bits": 14683136.0, - "err": 0.08448508381843567, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6878662109375, - "total_bits": 15468032.0, - "err": 0.0763353556394577, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 16909056.0, - "err": 0.055948320776224136, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 17304576.0, - "err": 0.04805108532309532, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.250732421875, - "total_bits": 17828864.0, - "err": 0.04606778174638748, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.375732421875, - "total_bits": 18353152.0, - "err": 0.045620448887348175, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 21103360.0, - "err": 0.027965659275650978, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.250732421875, - "total_bits": 22023168.0, - "err": 0.023843130096793175, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375732421875, - "total_bits": 22547456.0, - "err": 0.023720234632492065, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.500732421875, - "total_bits": 23071744.0, - "err": 0.02153899520635605, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.813232421875, - "total_bits": 24382464.0, - "err": 0.02064763754606247, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 25297664.0, - "err": 0.014723478816449642, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 25693184.0, - "err": 0.014744321815669537, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28143310546875, - "total_bits": 26346240.0, - "err": 0.014062694273889065, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 34081792.0, - "err": 0.010002201423048973, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.19.mlp.gate_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.187766335227273, - "total_bits": 25234432.000000004, - "err": 0.2067919373512268, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375266335227273, - "total_bits": 27397120.000000004, - "err": 0.1943773627281189, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625266335227273, - "total_bits": 30280704.000000004, - "err": 0.19024129211902618, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750266335227273, - "total_bits": 31722496.000000004, - "err": 0.17165856063365936, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250266335227273, - "total_bits": 37489664.0, - "err": 0.09686456620693207, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.687766335227273, - "total_bits": 42535936.0, - "err": 0.09333277493715286, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313165838068183, - "total_bits": 34964224.0, - "err": 0.10845744609832764, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125266335227273, - "total_bits": 36047872.0, - "err": 0.10050606727600098, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.187766335227273, - "total_bits": 36768768.0, - "err": 0.09870321303606033, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500266335227273, - "total_bits": 40373248.0, - "err": 0.0884573832154274, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6876331676136362, - "total_bits": 42534400.0, - "err": 0.0814208984375, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031316583806818, - "total_bits": 46498560.0, - "err": 0.055172618478536606, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1252663352272725, - "total_bits": 47582208.0, - "err": 0.048078667372465134, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2502663352272725, - "total_bits": 49024000.0, - "err": 0.04642802104353905, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3752663352272725, - "total_bits": 50465792.0, - "err": 0.04602411016821861, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031316583806818, - "total_bits": 58032896.0, - "err": 0.027549099177122116, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2502663352272725, - "total_bits": 60558336.0, - "err": 0.02381906658411026, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3752663352272725, - "total_bits": 62000128.0, - "err": 0.023700546473264694, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5002663352272725, - "total_bits": 63441920.0, - "err": 0.02208547107875347, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.8127663352272725, - "total_bits": 67046400.0, - "err": 0.021333565935492516, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031316583806818, - "total_bits": 69567232.0, - "err": 0.014316916465759277, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1252663352272725, - "total_bits": 70650880.0, - "err": 0.01434901263564825, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.281316583806818, - "total_bits": 72450816.0, - "err": 0.01376425102353096, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125266335227273, - "total_bits": 93719552.0, - "err": 0.009319051168859005, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.19.mlp.up_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.187766335227273, - "total_bits": 25234432.000000004, - "err": 0.22568614780902863, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375266335227273, - "total_bits": 27397120.000000004, - "err": 0.21209336817264557, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625266335227273, - "total_bits": 30280704.000000004, - "err": 0.20764219760894775, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750266335227273, - "total_bits": 31722496.000000004, - "err": 0.18748635053634644, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250266335227273, - "total_bits": 37489664.0, - "err": 0.10557568818330765, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.687766335227273, - "total_bits": 42535936.0, - "err": 0.1017976701259613, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313165838068183, - "total_bits": 34964224.0, - "err": 0.11812784522771835, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125266335227273, - "total_bits": 36047872.0, - "err": 0.10943726450204849, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.187766335227273, - "total_bits": 36768768.0, - "err": 0.10760167241096497, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500266335227273, - "total_bits": 40373248.0, - "err": 0.09642939269542694, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6876331676136362, - "total_bits": 42534400.0, - "err": 0.08875713497400284, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031316583806818, - "total_bits": 46498560.0, - "err": 0.05993907153606415, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1252663352272725, - "total_bits": 47582208.0, - "err": 0.05224989354610443, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2502663352272725, - "total_bits": 49024000.0, - "err": 0.050515539944171906, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3752663352272725, - "total_bits": 50465792.0, - "err": 0.05009999871253967, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031316583806818, - "total_bits": 58032896.0, - "err": 0.0299418643116951, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2502663352272725, - "total_bits": 60558336.0, - "err": 0.025637568905949593, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3752663352272725, - "total_bits": 62000128.0, - "err": 0.025532471016049385, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5002663352272725, - "total_bits": 63441920.0, - "err": 0.02373948134481907, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.8127663352272725, - "total_bits": 67046400.0, - "err": 0.022912295535206795, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031316583806818, - "total_bits": 69567232.0, - "err": 0.015402838587760925, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1252663352272725, - "total_bits": 70650880.0, - "err": 0.015054605901241302, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.281316583806818, - "total_bits": 72450816.0, - "err": 0.014809410087764263, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125266335227273, - "total_bits": 93719552.0, - "err": 0.009242539294064045, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.19.mlp.down_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1711869673295454, - "total_bits": 25043200.0, - "err": 0.2288987934589386, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 27402496.0, - "err": 0.20011140406131744, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 30286080.0, - "err": 0.18822622299194336, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.716641512784091, - "total_bits": 31334656.0, - "err": 0.16508151590824127, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.216641512784091, - "total_bits": 37101824.0, - "err": 0.10593539476394653, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7393687855113638, - "total_bits": 43131136.0, - "err": 0.09318536520004272, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 34965568.0, - "err": 0.12653154134750366, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 36053248.0, - "err": 0.11641073226928711, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1711869673295454, - "total_bits": 36577536.0, - "err": 0.10962026566267014, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.534823330965909, - "total_bits": 40771840.0, - "err": 0.08762529492378235, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.653775301846591, - "total_bits": 42143872.0, - "err": 0.0828329548239708, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 46499904.0, - "err": 0.06483517587184906, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 47587584.0, - "err": 0.05600268766283989, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.216641512784091, - "total_bits": 48636160.0, - "err": 0.051206354051828384, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.307550603693182, - "total_bits": 49684736.0, - "err": 0.05002524331212044, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 58034240.0, - "err": 0.03260223567485809, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.216641512784091, - "total_bits": 60170496.0, - "err": 0.026923537254333496, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.307550603693182, - "total_bits": 61219072.0, - "err": 0.026483170688152313, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.534823330965909, - "total_bits": 63840512.0, - "err": 0.02333533763885498, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.693914240056818, - "total_bits": 65675520.0, - "err": 0.022789299488067627, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 69568576.0, - "err": 0.017552148550748825, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 70656256.0, - "err": 0.01766294613480568, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.213251287286932, - "total_bits": 71665728.0, - "err": 0.01603006199002266, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 93724928.0, - "err": 0.012333840131759644, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.20.self_attn.q_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.188232421875, - "total_bits": 9178112.0, - "err": 0.10776302218437195, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 9964544.0, - "err": 0.10108979791402817, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 11013120.0, - "err": 0.09791313856840134, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750732421875, - "total_bits": 11537408.0, - "err": 0.08814901113510132, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250732421875, - "total_bits": 13634560.0, - "err": 0.05046606436371803, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.688232421875, - "total_bits": 15469568.0, - "err": 0.04809940233826637, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 12714752.0, - "err": 0.05949762836098671, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 13110272.0, - "err": 0.053956955671310425, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.188232421875, - "total_bits": 13372416.0, - "err": 0.051467154175043106, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500732421875, - "total_bits": 14683136.0, - "err": 0.04602944478392601, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6878662109375, - "total_bits": 15468032.0, - "err": 0.042593829333782196, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 16909056.0, - "err": 0.030371369794011116, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 17304576.0, - "err": 0.02583097107708454, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.250732421875, - "total_bits": 17828864.0, - "err": 0.024219172075390816, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.375732421875, - "total_bits": 18353152.0, - "err": 0.023828871548175812, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 21103360.0, - "err": 0.015206257812678814, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.250732421875, - "total_bits": 22023168.0, - "err": 0.012411984615027905, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375732421875, - "total_bits": 22547456.0, - "err": 0.012253832072019577, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.500732421875, - "total_bits": 23071744.0, - "err": 0.011500491760671139, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.813232421875, - "total_bits": 24382464.0, - "err": 0.010998218320310116, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 25297664.0, - "err": 0.007940834388136864, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 25693184.0, - "err": 0.007601914927363396, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28143310546875, - "total_bits": 26346240.0, - "err": 0.007346709258854389, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 34081792.0, - "err": 0.0048462883569300175, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.20.self_attn.k_proj", - "numel": 524288, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.193359375, - "total_bits": 1149952.0, - "err": 0.09891804307699203, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.380859375, - "total_bits": 1248256.0, - "err": 0.09221358597278595, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.630859375, - "total_bits": 1379328.0, - "err": 0.08895709365606308, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.755859375, - "total_bits": 1444864.0, - "err": 0.07961475104093552, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.255859375, - "total_bits": 1707008.0, - "err": 0.046000123023986816, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.693359375, - "total_bits": 1936384.0, - "err": 0.04363855719566345, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03271484375, - "total_bits": 1590016.0, - "err": 0.05380239710211754, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.130859375, - "total_bits": 1641472.0, - "err": 0.049790434539318085, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.193359375, - "total_bits": 1674240.0, - "err": 0.04689899832010269, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.505859375, - "total_bits": 1838080.0, - "err": 0.04160308092832565, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6904296875, - "total_bits": 1934848.0, - "err": 0.0383591428399086, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03271484375, - "total_bits": 2114304.0, - "err": 0.02717534266412258, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.130859375, - "total_bits": 2165760.0, - "err": 0.02373668923974037, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.255859375, - "total_bits": 2231296.0, - "err": 0.022050710394978523, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.380859375, - "total_bits": 2296832.0, - "err": 0.021608080714941025, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03271484375, - "total_bits": 2638592.0, - "err": 0.0136032709851861, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.255859375, - "total_bits": 2755584.0, - "err": 0.011226614937186241, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.380859375, - "total_bits": 2821120.0, - "err": 0.01104316022247076, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.505859375, - "total_bits": 2886656.0, - "err": 0.010339139960706234, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.818359375, - "total_bits": 3050496.0, - "err": 0.009819867089390755, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03271484375, - "total_bits": 3162880.0, - "err": 0.007002705708146095, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.130859375, - "total_bits": 3214336.0, - "err": 0.006824665702879429, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28271484375, - "total_bits": 3293952.0, - "err": 0.0064613730646669865, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.130859375, - "total_bits": 4262912.0, - "err": 0.00416063005104661, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.20.self_attn.v_proj", - "numel": 524288, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.193359375, - "total_bits": 1149952.0, - "err": 0.2521402835845947, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.380859375, - "total_bits": 1248256.0, - "err": 0.23666736483573914, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.630859375, - "total_bits": 1379328.0, - "err": 0.231153666973114, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.755859375, - "total_bits": 1444864.0, - "err": 0.2076246738433838, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.255859375, - "total_bits": 1707008.0, - "err": 0.1177678108215332, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.693359375, - "total_bits": 1936384.0, - "err": 0.11307177692651749, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03271484375, - "total_bits": 1590016.0, - "err": 0.1337173879146576, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.130859375, - "total_bits": 1641472.0, - "err": 0.12225267291069031, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.193359375, - "total_bits": 1674240.0, - "err": 0.11993122100830078, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.505859375, - "total_bits": 1838080.0, - "err": 0.1069926843047142, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6904296875, - "total_bits": 1934848.0, - "err": 0.0986027717590332, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03271484375, - "total_bits": 2114304.0, - "err": 0.06803043931722641, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.130859375, - "total_bits": 2165760.0, - "err": 0.058461423963308334, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.255859375, - "total_bits": 2231296.0, - "err": 0.056221190840005875, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.380859375, - "total_bits": 2296832.0, - "err": 0.055643532425165176, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03271484375, - "total_bits": 2638592.0, - "err": 0.033913880586624146, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.255859375, - "total_bits": 2755584.0, - "err": 0.028364934027194977, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.380859375, - "total_bits": 2821120.0, - "err": 0.028181662783026695, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.505859375, - "total_bits": 2886656.0, - "err": 0.026062803342938423, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.818359375, - "total_bits": 3050496.0, - "err": 0.025021804496645927, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03271484375, - "total_bits": 3162880.0, - "err": 0.017253028228878975, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.130859375, - "total_bits": 3214336.0, - "err": 0.016370652243494987, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28271484375, - "total_bits": 3293952.0, - "err": 0.016244055703282356, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.130859375, - "total_bits": 4262912.0, - "err": 0.0094605078920722, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.20.self_attn.o_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.188232421875, - "total_bits": 9178112.0, - "err": 0.1989995539188385, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 9964544.0, - "err": 0.18090754747390747, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 11013120.0, - "err": 0.17473790049552917, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750732421875, - "total_bits": 11537408.0, - "err": 0.150717630982399, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250732421875, - "total_bits": 13634560.0, - "err": 0.09167253971099854, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.688232421875, - "total_bits": 15469568.0, - "err": 0.08650883287191391, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 12714752.0, - "err": 0.10678914934396744, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 13110272.0, - "err": 0.09719190746545792, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.188232421875, - "total_bits": 13372416.0, - "err": 0.09458445012569427, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500732421875, - "total_bits": 14683136.0, - "err": 0.07976939529180527, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6878662109375, - "total_bits": 15468032.0, - "err": 0.07119397819042206, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 16909056.0, - "err": 0.054938361048698425, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 17304576.0, - "err": 0.04678330197930336, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.250732421875, - "total_bits": 17828864.0, - "err": 0.04423573985695839, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.375732421875, - "total_bits": 18353152.0, - "err": 0.043604526668787, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 21103360.0, - "err": 0.02752593532204628, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.250732421875, - "total_bits": 22023168.0, - "err": 0.02307332120835781, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375732421875, - "total_bits": 22547456.0, - "err": 0.022907182574272156, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.500732421875, - "total_bits": 23071744.0, - "err": 0.020690225064754486, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.813232421875, - "total_bits": 24382464.0, - "err": 0.0196751207113266, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 25297664.0, - "err": 0.0145817706361413, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 25693184.0, - "err": 0.014648850075900555, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28143310546875, - "total_bits": 26346240.0, - "err": 0.013719387352466583, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 34081792.0, - "err": 0.010144567117094994, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.20.mlp.gate_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.187766335227273, - "total_bits": 25234432.000000004, - "err": 0.20937363803386688, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375266335227273, - "total_bits": 27397120.000000004, - "err": 0.1969214677810669, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625266335227273, - "total_bits": 30280704.000000004, - "err": 0.1926461011171341, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750266335227273, - "total_bits": 31722496.000000004, - "err": 0.17398548126220703, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250266335227273, - "total_bits": 37489664.0, - "err": 0.09823892265558243, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.687766335227273, - "total_bits": 42535936.0, - "err": 0.0946551039814949, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313165838068183, - "total_bits": 34964224.0, - "err": 0.10994613915681839, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125266335227273, - "total_bits": 36047872.0, - "err": 0.10188797861337662, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.187766335227273, - "total_bits": 36768768.0, - "err": 0.100075863301754, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500266335227273, - "total_bits": 40373248.0, - "err": 0.08967576175928116, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6876331676136362, - "total_bits": 42534400.0, - "err": 0.08264772593975067, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031316583806818, - "total_bits": 46498560.0, - "err": 0.05596836283802986, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1252663352272725, - "total_bits": 47582208.0, - "err": 0.04871552437543869, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2502663352272725, - "total_bits": 49024000.0, - "err": 0.04705333337187767, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3752663352272725, - "total_bits": 50465792.0, - "err": 0.04664495959877968, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031316583806818, - "total_bits": 58032896.0, - "err": 0.027954645454883575, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2502663352272725, - "total_bits": 60558336.0, - "err": 0.024119144305586815, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3752663352272725, - "total_bits": 62000128.0, - "err": 0.024002863094210625, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5002663352272725, - "total_bits": 63441920.0, - "err": 0.022376587614417076, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.8127663352272725, - "total_bits": 67046400.0, - "err": 0.02161126770079136, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031316583806818, - "total_bits": 69567232.0, - "err": 0.014562864787876606, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1252663352272725, - "total_bits": 70650880.0, - "err": 0.014496606774628162, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.281316583806818, - "total_bits": 72450816.0, - "err": 0.01400376483798027, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125266335227273, - "total_bits": 93719552.0, - "err": 0.009386424906551838, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.20.mlp.up_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.187766335227273, - "total_bits": 25234432.000000004, - "err": 0.21557657420635223, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375266335227273, - "total_bits": 27397120.000000004, - "err": 0.20282329618930817, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625266335227273, - "total_bits": 30280704.000000004, - "err": 0.19858242571353912, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750266335227273, - "total_bits": 31722496.000000004, - "err": 0.1794300675392151, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250266335227273, - "total_bits": 37489664.0, - "err": 0.10099372267723083, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.687766335227273, - "total_bits": 42535936.0, - "err": 0.09746000170707703, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313165838068183, - "total_bits": 34964224.0, - "err": 0.11292848736047745, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125266335227273, - "total_bits": 36047872.0, - "err": 0.10467316955327988, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.187766335227273, - "total_bits": 36768768.0, - "err": 0.10290729254484177, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500266335227273, - "total_bits": 40373248.0, - "err": 0.09226102381944656, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6876331676136362, - "total_bits": 42534400.0, - "err": 0.08507052063941956, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031316583806818, - "total_bits": 46498560.0, - "err": 0.05740997940301895, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1252663352272725, - "total_bits": 47582208.0, - "err": 0.04999148100614548, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2502663352272725, - "total_bits": 49024000.0, - "err": 0.048323702067136765, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3752663352272725, - "total_bits": 50465792.0, - "err": 0.047931917011737823, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031316583806818, - "total_bits": 58032896.0, - "err": 0.028665296733379364, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2502663352272725, - "total_bits": 60558336.0, - "err": 0.024594344198703766, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3752663352272725, - "total_bits": 62000128.0, - "err": 0.02447580173611641, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5002663352272725, - "total_bits": 63441920.0, - "err": 0.022783271968364716, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.8127663352272725, - "total_bits": 67046400.0, - "err": 0.021996162831783295, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031316583806818, - "total_bits": 69567232.0, - "err": 0.014789547771215439, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1252663352272725, - "total_bits": 70650880.0, - "err": 0.01450846903026104, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.281316583806818, - "total_bits": 72450816.0, - "err": 0.014220085926353931, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125266335227273, - "total_bits": 93719552.0, - "err": 0.009024165570735931, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.20.mlp.down_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1711869673295454, - "total_bits": 25043200.0, - "err": 0.21833863854408264, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 27402496.0, - "err": 0.19124160706996918, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 30286080.0, - "err": 0.1802063286304474, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.716641512784091, - "total_bits": 31334656.0, - "err": 0.15730354189872742, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.216641512784091, - "total_bits": 37101824.0, - "err": 0.10121498256921768, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7393687855113638, - "total_bits": 43131136.0, - "err": 0.08914308249950409, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 34965568.0, - "err": 0.12148858606815338, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 36053248.0, - "err": 0.11076556146144867, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1711869673295454, - "total_bits": 36577536.0, - "err": 0.10460386425256729, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.534823330965909, - "total_bits": 40771840.0, - "err": 0.08361902087926865, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.653775301846591, - "total_bits": 42143872.0, - "err": 0.07897846400737762, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 46499904.0, - "err": 0.062268778681755066, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 47587584.0, - "err": 0.05346469581127167, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.216641512784091, - "total_bits": 48636160.0, - "err": 0.04901545122265816, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.307550603693182, - "total_bits": 49684736.0, - "err": 0.047927748411893845, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 58034240.0, - "err": 0.03142966330051422, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.216641512784091, - "total_bits": 60170496.0, - "err": 0.025959433987736702, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.307550603693182, - "total_bits": 61219072.0, - "err": 0.025558074936270714, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.534823330965909, - "total_bits": 63840512.0, - "err": 0.02254643850028515, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.693914240056818, - "total_bits": 65675520.0, - "err": 0.022061889991164207, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 69568576.0, - "err": 0.017163628712296486, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 70656256.0, - "err": 0.01721136085689068, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.213251287286932, - "total_bits": 71665728.0, - "err": 0.015716828405857086, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 93724928.0, - "err": 0.012318450026214123, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.21.self_attn.q_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.188232421875, - "total_bits": 9178112.0, - "err": 0.11741141974925995, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 9964544.0, - "err": 0.10940573364496231, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 11013120.0, - "err": 0.10571890324354172, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750732421875, - "total_bits": 11537408.0, - "err": 0.09517442435026169, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250732421875, - "total_bits": 13634560.0, - "err": 0.05491344630718231, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.688232421875, - "total_bits": 15469568.0, - "err": 0.05213942378759384, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 12714752.0, - "err": 0.06454464048147202, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 13110272.0, - "err": 0.05894427001476288, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.188232421875, - "total_bits": 13372416.0, - "err": 0.05597537383437157, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500732421875, - "total_bits": 14683136.0, - "err": 0.0496358722448349, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6878662109375, - "total_bits": 15468032.0, - "err": 0.04583267122507095, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 16909056.0, - "err": 0.032697875052690506, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 17304576.0, - "err": 0.028184421360492706, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.250732421875, - "total_bits": 17828864.0, - "err": 0.026369646191596985, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.375732421875, - "total_bits": 18353152.0, - "err": 0.025921380147337914, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 21103360.0, - "err": 0.016370626166462898, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.250732421875, - "total_bits": 22023168.0, - "err": 0.01358207780867815, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375732421875, - "total_bits": 22547456.0, - "err": 0.013385509140789509, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.500732421875, - "total_bits": 23071744.0, - "err": 0.01251156534999609, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.813232421875, - "total_bits": 24382464.0, - "err": 0.011924517340958118, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 25297664.0, - "err": 0.008562043309211731, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 25693184.0, - "err": 0.008417457342147827, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28143310546875, - "total_bits": 26346240.0, - "err": 0.007939680479466915, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 34081792.0, - "err": 0.005467805080115795, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.21.self_attn.k_proj", - "numel": 524288, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.193359375, - "total_bits": 1149952.0, - "err": 0.1100945770740509, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.380859375, - "total_bits": 1248256.0, - "err": 0.10270747542381287, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.630859375, - "total_bits": 1379328.0, - "err": 0.0992354303598404, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.755859375, - "total_bits": 1444864.0, - "err": 0.0892423614859581, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.255859375, - "total_bits": 1707008.0, - "err": 0.0512680746614933, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.693359375, - "total_bits": 1936384.0, - "err": 0.04863245412707329, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03271484375, - "total_bits": 1590016.0, - "err": 0.059630632400512695, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.130859375, - "total_bits": 1641472.0, - "err": 0.05497615411877632, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.193359375, - "total_bits": 1674240.0, - "err": 0.05232460796833038, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.505859375, - "total_bits": 1838080.0, - "err": 0.046385347843170166, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6904296875, - "total_bits": 1934848.0, - "err": 0.042687010020017624, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03271484375, - "total_bits": 2114304.0, - "err": 0.03010900504887104, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.130859375, - "total_bits": 2165760.0, - "err": 0.02631673589348793, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.255859375, - "total_bits": 2231296.0, - "err": 0.02455027587711811, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.380859375, - "total_bits": 2296832.0, - "err": 0.02411634661257267, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03271484375, - "total_bits": 2638592.0, - "err": 0.015039152465760708, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.255859375, - "total_bits": 2755584.0, - "err": 0.012471008114516735, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.380859375, - "total_bits": 2821120.0, - "err": 0.012310486286878586, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.505859375, - "total_bits": 2886656.0, - "err": 0.01148874219506979, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.818359375, - "total_bits": 3050496.0, - "err": 0.010942574590444565, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03271484375, - "total_bits": 3162880.0, - "err": 0.007761198561638594, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.130859375, - "total_bits": 3214336.0, - "err": 0.007514536380767822, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28271484375, - "total_bits": 3293952.0, - "err": 0.007210602983832359, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.130859375, - "total_bits": 4262912.0, - "err": 0.004586981143802404, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.21.self_attn.v_proj", - "numel": 524288, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.193359375, - "total_bits": 1149952.0, - "err": 0.2523910105228424, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.380859375, - "total_bits": 1248256.0, - "err": 0.23597654700279236, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.630859375, - "total_bits": 1379328.0, - "err": 0.23045125603675842, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.755859375, - "total_bits": 1444864.0, - "err": 0.20691964030265808, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.255859375, - "total_bits": 1707008.0, - "err": 0.11786631494760513, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.693359375, - "total_bits": 1936384.0, - "err": 0.11314301937818527, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03271484375, - "total_bits": 1590016.0, - "err": 0.13362297415733337, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.130859375, - "total_bits": 1641472.0, - "err": 0.12276648730039597, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.193359375, - "total_bits": 1674240.0, - "err": 0.12006386369466782, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.505859375, - "total_bits": 1838080.0, - "err": 0.10699000209569931, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6904296875, - "total_bits": 1934848.0, - "err": 0.09831508994102478, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03271484375, - "total_bits": 2114304.0, - "err": 0.0676891878247261, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.130859375, - "total_bits": 2165760.0, - "err": 0.058680322021245956, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.255859375, - "total_bits": 2231296.0, - "err": 0.056341078132390976, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.380859375, - "total_bits": 2296832.0, - "err": 0.05587203428149223, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03271484375, - "total_bits": 2638592.0, - "err": 0.033914871513843536, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.255859375, - "total_bits": 2755584.0, - "err": 0.0284267645329237, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.380859375, - "total_bits": 2821120.0, - "err": 0.02829815074801445, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.505859375, - "total_bits": 2886656.0, - "err": 0.02614424005150795, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.818359375, - "total_bits": 3050496.0, - "err": 0.025149697437882423, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03271484375, - "total_bits": 3162880.0, - "err": 0.01731668785214424, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.130859375, - "total_bits": 3214336.0, - "err": 0.01649055816233158, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28271484375, - "total_bits": 3293952.0, - "err": 0.016387518495321274, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.130859375, - "total_bits": 4262912.0, - "err": 0.009677309542894363, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.21.self_attn.o_proj", - "numel": 4194304, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.188232421875, - "total_bits": 9178112.0, - "err": 0.1304466426372528, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 9964544.0, - "err": 0.11804970353841782, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 11013120.0, - "err": 0.11405201256275177, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750732421875, - "total_bits": 11537408.0, - "err": 0.09846694767475128, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250732421875, - "total_bits": 13634560.0, - "err": 0.05985678359866142, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.688232421875, - "total_bits": 15469568.0, - "err": 0.05645013600587845, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 12714752.0, - "err": 0.06903661042451859, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 13110272.0, - "err": 0.06317273527383804, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.188232421875, - "total_bits": 13372416.0, - "err": 0.061628758907318115, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500732421875, - "total_bits": 14683136.0, - "err": 0.05188719183206558, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6878662109375, - "total_bits": 15468032.0, - "err": 0.04666375741362572, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 16909056.0, - "err": 0.03536827489733696, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 17304576.0, - "err": 0.030409418046474457, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.250732421875, - "total_bits": 17828864.0, - "err": 0.02890334650874138, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.375732421875, - "total_bits": 18353152.0, - "err": 0.028546340763568878, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 21103360.0, - "err": 0.017726987600326538, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.250732421875, - "total_bits": 22023168.0, - "err": 0.015261664986610413, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.375732421875, - "total_bits": 22547456.0, - "err": 0.015164200216531754, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.500732421875, - "total_bits": 23071744.0, - "err": 0.01369218248873949, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.813232421875, - "total_bits": 24382464.0, - "err": 0.01306718960404396, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 25297664.0, - "err": 0.00952488649636507, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 25693184.0, - "err": 0.009891442023217678, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.28143310546875, - "total_bits": 26346240.0, - "err": 0.009029648266732693, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 34081792.0, - "err": 0.007139615248888731, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.21.mlp.gate_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.187766335227273, - "total_bits": 25234432.000000004, - "err": 0.19050028920173645, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375266335227273, - "total_bits": 27397120.000000004, - "err": 0.17915838956832886, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625266335227273, - "total_bits": 30280704.000000004, - "err": 0.17523986101150513, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750266335227273, - "total_bits": 31722496.000000004, - "err": 0.15830042958259583, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250266335227273, - "total_bits": 37489664.0, - "err": 0.08969065546989441, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.687766335227273, - "total_bits": 42535936.0, - "err": 0.08640365302562714, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313165838068183, - "total_bits": 34964224.0, - "err": 0.10073425620794296, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125266335227273, - "total_bits": 36047872.0, - "err": 0.09317202121019363, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.187766335227273, - "total_bits": 36768768.0, - "err": 0.09135386347770691, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500266335227273, - "total_bits": 40373248.0, - "err": 0.08189153671264648, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6876331676136362, - "total_bits": 42534400.0, - "err": 0.07563848793506622, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031316583806818, - "total_bits": 46498560.0, - "err": 0.05171505734324455, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1252663352272725, - "total_bits": 47582208.0, - "err": 0.044856611639261246, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2502663352272725, - "total_bits": 49024000.0, - "err": 0.04327099397778511, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3752663352272725, - "total_bits": 50465792.0, - "err": 0.04288964718580246, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031316583806818, - "total_bits": 58032896.0, - "err": 0.02593359164893627, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2502663352272725, - "total_bits": 60558336.0, - "err": 0.022772589698433876, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3752663352272725, - "total_bits": 62000128.0, - "err": 0.022656889632344246, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5002663352272725, - "total_bits": 63441920.0, - "err": 0.021229060366749763, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.8127663352272725, - "total_bits": 67046400.0, - "err": 0.02056003548204899, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031316583806818, - "total_bits": 69567232.0, - "err": 0.014068664982914925, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1252663352272725, - "total_bits": 70650880.0, - "err": 0.014534777030348778, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.281316583806818, - "total_bits": 72450816.0, - "err": 0.013566520065069199, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125266335227273, - "total_bits": 93719552.0, - "err": 0.010475130751729012, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.21.mlp.up_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.187766335227273, - "total_bits": 25234432.000000004, - "err": 0.15811914205551147, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375266335227273, - "total_bits": 27397120.000000004, - "err": 0.14870020747184753, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625266335227273, - "total_bits": 30280704.000000004, - "err": 0.14538605511188507, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.750266335227273, - "total_bits": 31722496.000000004, - "err": 0.1313442438840866, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.250266335227273, - "total_bits": 37489664.0, - "err": 0.07422035187482834, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.687766335227273, - "total_bits": 42535936.0, - "err": 0.07146260142326355, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.0313165838068183, - "total_bits": 34964224.0, - "err": 0.08344143629074097, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125266335227273, - "total_bits": 36047872.0, - "err": 0.07710859924554825, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.187766335227273, - "total_bits": 36768768.0, - "err": 0.07566618919372559, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.500266335227273, - "total_bits": 40373248.0, - "err": 0.06774032860994339, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.6876331676136362, - "total_bits": 42534400.0, - "err": 0.062452081590890884, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.031316583806818, - "total_bits": 46498560.0, - "err": 0.04262218624353409, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.1252663352272725, - "total_bits": 47582208.0, - "err": 0.036947134882211685, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.2502663352272725, - "total_bits": 49024000.0, - "err": 0.03562276065349579, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.3752663352272725, - "total_bits": 50465792.0, - "err": 0.03529711067676544, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.031316583806818, - "total_bits": 58032896.0, - "err": 0.021318817511200905, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.2502663352272725, - "total_bits": 60558336.0, - "err": 0.018341805785894394, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.3752663352272725, - "total_bits": 62000128.0, - "err": 0.01825745217502117, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.5002663352272725, - "total_bits": 63441920.0, - "err": 0.017023343592882156, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.8127663352272725, - "total_bits": 67046400.0, - "err": 0.016445962712168694, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.031316583806818, - "total_bits": 69567232.0, - "err": 0.011275176890194416, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.1252663352272725, - "total_bits": 70650880.0, - "err": 0.011166435666382313, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.281316583806818, - "total_bits": 72450816.0, - "err": 0.01083346176892519, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125266335227273, - "total_bits": 93719552.0, - "err": 0.0073922425508499146, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - }, - { - "key": "model.layers.21.mlp.down_proj", - "numel": 11534336, - "options": [ - { - "desc": "0.05:3b/0.95:2b 32g s4", - "bpw": 2.1711869673295454, - "total_bits": 25043200.0, - "err": 0.13385267555713654, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:3b/0.75:2b 32g s4", - "bpw": 2.375732421875, - "total_bits": 27402496.0, - "err": 0.1174042820930481, - "qparams": { - "group_size": 32, - "bits": [ - 3, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.25:4b/0.75:2b 32g s4", - "bpw": 2.625732421875, - "total_bits": 30286080.0, - "err": 0.11029203981161118, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 2 - ], - "bits_prop": [ - 0.25, - 0.75 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", - "bpw": 2.716641512784091, - "total_bits": 31334656.0, - "err": 0.09698320925235748, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3, - 2 - ], - "bits_prop": [ - 0.1, - 0.4, - 0.5 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:4b/0.9:3b 32g s4", - "bpw": 3.216641512784091, - "total_bits": 37101824.0, - "err": 0.06181320175528526, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.2:6b/0.8:3b 32g s4", - "bpw": 3.7393687855113638, - "total_bits": 43131136.0, - "err": 0.05462951213121414, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 3 - ], - "bits_prop": [ - 0.2, - 0.8 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 128g s4", - "bpw": 3.03143310546875, - "total_bits": 34965568.0, - "err": 0.0779661238193512, - "qparams": { - "group_size": 128, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:3b 32g s4", - "bpw": 3.125732421875, - "total_bits": 36053248.0, - "err": 0.0685676857829094, - "qparams": { - "group_size": 32, - "bits": [ - 3 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:4b/0.95:3b 32g s4", - "bpw": 3.1711869673295454, - "total_bits": 36577536.0, - "err": 0.06395049393177032, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.05, - 0.95 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:4b/0.6:3b 32g s4", - "bpw": 3.534823330965909, - "total_bits": 40771840.0, - "err": 0.05188402906060219, - "qparams": { - "group_size": 32, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.6:4b/0.4:3b 64g s4", - "bpw": 3.653775301846591, - "total_bits": 42143872.0, - "err": 0.049526724964380264, - "qparams": { - "group_size": 64, - "bits": [ - 4, - 3 - ], - "bits_prop": [ - 0.6, - 0.4 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 128g s4", - "bpw": 4.03143310546875, - "total_bits": 46499904.0, - "err": 0.039513472467660904, - "qparams": { - "group_size": 128, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:4b 32g s4", - "bpw": 4.125732421875, - "total_bits": 47587584.0, - "err": 0.033443622291088104, - "qparams": { - "group_size": 32, - "bits": [ - 4 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:5b/0.9:4b 32g s4", - "bpw": 4.216641512784091, - "total_bits": 48636160.0, - "err": 0.030381685122847557, - "qparams": { - "group_size": 32, - "bits": [ - 5, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:4b 32g s4", - "bpw": 4.307550603693182, - "total_bits": 49684736.0, - "err": 0.029613161459565163, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 4 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:5b 128g s4", - "bpw": 5.03143310546875, - "total_bits": 58034240.0, - "err": 0.020163629204034805, - "qparams": { - "group_size": 128, - "bits": [ - 5 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:6b/0.9:5b 32g s4", - "bpw": 5.216641512784091, - "total_bits": 60170496.0, - "err": 0.0167301744222641, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.05:8b/0.05:6b/0.9:5b 32g s4", - "bpw": 5.307550603693182, - "total_bits": 61219072.0, - "err": 0.016454622149467468, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.05, - 0.05, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.4:6b/0.6:5b 32g s4", - "bpw": 5.534823330965909, - "total_bits": 63840512.0, - "err": 0.014893893152475357, - "qparams": { - "group_size": 32, - "bits": [ - 6, - 5 - ], - "bits_prop": [ - 0.4, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.3:6b/0.6:5b 32g s4", - "bpw": 5.693914240056818, - "total_bits": 65675520.0, - "err": 0.014549952000379562, - "qparams": { - "group_size": 32, - "bits": [ - 8, - 6, - 5 - ], - "bits_prop": [ - 0.1, - 0.3, - 0.6 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 128g s4", - "bpw": 6.03143310546875, - "total_bits": 69568576.0, - "err": 0.011276149190962315, - "qparams": { - "group_size": 128, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:6b 32g s4", - "bpw": 6.125732421875, - "total_bits": 70656256.0, - "err": 0.011875870637595654, - "qparams": { - "group_size": 32, - "bits": [ - 6 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - }, - { - "desc": "0.1:8b/0.9:6b 128g s4", - "bpw": 6.213251287286932, - "total_bits": 71665728.0, - "err": 0.010211360640823841, - "qparams": { - "group_size": 128, - "bits": [ - 8, - 6 - ], - "bits_prop": [ - 0.1, - 0.9 - ], - "scale_bits": 4 - } - }, - { - "desc": "1.0:8b 32g s4", - "bpw": 8.125732421875, - "total_bits": 93724928.0, - "err": 0.009274039417505264, - "qparams": { - "group_size": 32, - "bits": [ - 8 - ], - "bits_prop": [ - 1.0 - ], - "scale_bits": 4 - } - } - ] - } - ], - "last_module_idx": 46, - "base_perplexity": 16.02159451514135 + "measurement": { + "model.layers.0.self_attn": [ + { + "accuracy": 0.9504409432411194, + "total_bits": 20060160, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9537268280982971, + "total_bits": 20715520, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9635463953018188, + "total_bits": 21157120, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9679550528526306, + "total_bits": 25219584, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9718621373176575, + "total_bits": 29789184, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9734413027763367, + "total_bits": 29806336, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9729241132736206, + "total_bits": 38046720, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9745633602142334, + "total_bits": 38063872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9776771068572998, + "total_bits": 38379008, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9781818389892578, + "total_bits": 38940672, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9845654368400574, + "total_bits": 39243520, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9866740703582764, + "total_bits": 39550464, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9849143624305725, + "total_bits": 39983104, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9867869019508362, + "total_bits": 40439808, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9878080487251282, + "total_bits": 49122304, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9927870631217957, + "total_bits": 49876992, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9879250526428223, + "total_bits": 56921088, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939776659011841, + "total_bits": 58863616, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9903014302253723, + "total_bits": 75795456, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.0.mlp": [ + { + "accuracy": 0.9137428402900696, + "total_bits": 77577584, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9167191982269287, + "total_bits": 80461168, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9327775835990906, + "total_bits": 89861376, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9384987950325012, + "total_bits": 100805888, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9578671455383301, + "total_bits": 113536912, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9611591696739197, + "total_bits": 116422912, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9683015942573547, + "total_bits": 125157776, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9765557646751404, + "total_bits": 142997648, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9788461327552795, + "total_bits": 145111296, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.978515088558197, + "total_bits": 147550096, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9810039401054382, + "total_bits": 150436096, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9888549447059631, + "total_bits": 181563280, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9892873167991638, + "total_bits": 184449280, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9928166270256042, + "total_bits": 209940368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9936298131942749, + "total_bits": 218140224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9946998953819275, + "total_bits": 237538880, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957722425460815, + "total_bits": 277909056, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.1.self_attn": [ + { + "accuracy": 0.89671391248703, + "total_bits": 20060160, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9097907543182373, + "total_bits": 20715520, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9201149940490723, + "total_bits": 21157120, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.946587085723877, + "total_bits": 25219584, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.949317991733551, + "total_bits": 29789184, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9518420696258545, + "total_bits": 29806336, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9603793621063232, + "total_bits": 38046720, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.963226318359375, + "total_bits": 38063872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9681024551391602, + "total_bits": 38379008, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9697380661964417, + "total_bits": 38940672, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9750745296478271, + "total_bits": 39243520, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9775166511535645, + "total_bits": 39550464, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9773923754692078, + "total_bits": 39983104, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.979856014251709, + "total_bits": 40439808, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9854856133460999, + "total_bits": 49122304, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9883766770362854, + "total_bits": 49876992, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9871637225151062, + "total_bits": 56921088, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9910922050476074, + "total_bits": 58863616, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.993614137172699, + "total_bits": 75795456, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.1.mlp": [ + { + "accuracy": 0.8577123880386353, + "total_bits": 77577584, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.8617769479751587, + "total_bits": 80461168, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.8874801397323608, + "total_bits": 89861376, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.895362913608551, + "total_bits": 100805888, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9290304780006409, + "total_bits": 113536912, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9346158504486084, + "total_bits": 116422912, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9453338980674744, + "total_bits": 125157776, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9628351926803589, + "total_bits": 142997648, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9664553999900818, + "total_bits": 145111296, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9638265371322632, + "total_bits": 147550096, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9684175848960876, + "total_bits": 150436096, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.981627345085144, + "total_bits": 181563280, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9831932783126831, + "total_bits": 184449280, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9892428517341614, + "total_bits": 209940368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9899433255195618, + "total_bits": 218140224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9916393160820007, + "total_bits": 237538880, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9943563342094421, + "total_bits": 277909056, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.2.self_attn": [ + { + "accuracy": 0.8971129059791565, + "total_bits": 20060160, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9082987904548645, + "total_bits": 20715520, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9170451164245605, + "total_bits": 21157120, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9475849866867065, + "total_bits": 25219584, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.950081467628479, + "total_bits": 29789184, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9524359703063965, + "total_bits": 29806336, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9665535688400269, + "total_bits": 38046720, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9696725010871887, + "total_bits": 38063872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9724518060684204, + "total_bits": 38379008, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.974159300327301, + "total_bits": 38940672, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9756543040275574, + "total_bits": 39243520, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9773944616317749, + "total_bits": 39550464, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9788696765899658, + "total_bits": 39983104, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9803017377853394, + "total_bits": 40439808, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9873833060264587, + "total_bits": 49122304, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9884156584739685, + "total_bits": 49876992, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9901586174964905, + "total_bits": 56921088, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9910740852355957, + "total_bits": 58863616, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9944394826889038, + "total_bits": 75795456, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.2.mlp": [ + { + "accuracy": 0.8253173232078552, + "total_bits": 77577584, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.8439314961433411, + "total_bits": 80461168, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.8654630184173584, + "total_bits": 89861376, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.8662594556808472, + "total_bits": 100805888, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9528393149375916, + "total_bits": 113536912, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9686983227729797, + "total_bits": 116422912, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9715396761894226, + "total_bits": 125157776, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9774160385131836, + "total_bits": 142997648, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9788769483566284, + "total_bits": 145111296, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9824426770210266, + "total_bits": 147550096, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9857079982757568, + "total_bits": 150436096, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9898361563682556, + "total_bits": 181563280, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9897869229316711, + "total_bits": 184449280, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9911811351776123, + "total_bits": 209940368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9921762943267822, + "total_bits": 218140224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9924684762954712, + "total_bits": 237538880, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9931792616844177, + "total_bits": 277909056, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.3.self_attn": [ + { + "accuracy": 0.9678996801376343, + "total_bits": 20060160, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9684207439422607, + "total_bits": 20715520, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9764547348022461, + "total_bits": 21157120, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9824955463409424, + "total_bits": 25219584, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9844886660575867, + "total_bits": 29789184, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9853679537773132, + "total_bits": 29806336, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.989806056022644, + "total_bits": 38046720, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9903212785720825, + "total_bits": 38063872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9913548231124878, + "total_bits": 38379008, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9916152954101562, + "total_bits": 38940672, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9916980266571045, + "total_bits": 39243520, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9921417236328125, + "total_bits": 39550464, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9939634203910828, + "total_bits": 39983104, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942553043365479, + "total_bits": 40439808, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965537786483765, + "total_bits": 49122304, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968793988227844, + "total_bits": 49876992, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972424507141113, + "total_bits": 56921088, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9979718327522278, + "total_bits": 58863616, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9988452196121216, + "total_bits": 75795456, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.3.mlp": [ + { + "accuracy": 0.9371699094772339, + "total_bits": 77577584, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9390871524810791, + "total_bits": 80461168, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9488091468811035, + "total_bits": 89861376, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9519625306129456, + "total_bits": 100805888, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9682320952415466, + "total_bits": 113536912, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9706849455833435, + "total_bits": 116422912, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9749845266342163, + "total_bits": 125157776, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.983243465423584, + "total_bits": 142997648, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9848823547363281, + "total_bits": 145111296, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9837727546691895, + "total_bits": 147550096, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9858242869377136, + "total_bits": 150436096, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9917846918106079, + "total_bits": 181563280, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9925159215927124, + "total_bits": 184449280, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9952250719070435, + "total_bits": 209940368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9955382347106934, + "total_bits": 218140224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962323307991028, + "total_bits": 237538880, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9975662231445312, + "total_bits": 277909056, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.4.self_attn": [ + { + "accuracy": 0.9407169222831726, + "total_bits": 20060160, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9476116299629211, + "total_bits": 20715520, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9631152153015137, + "total_bits": 21157120, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9699748754501343, + "total_bits": 25219584, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9733622074127197, + "total_bits": 29789184, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9742394089698792, + "total_bits": 29806336, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9820936918258667, + "total_bits": 38046720, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9838315844535828, + "total_bits": 38063872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9860427975654602, + "total_bits": 38379008, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9864315986633301, + "total_bits": 38940672, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9855299592018127, + "total_bits": 39243520, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9875594973564148, + "total_bits": 39550464, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9905156493186951, + "total_bits": 39983104, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9909719228744507, + "total_bits": 40439808, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9946304559707642, + "total_bits": 49122304, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9948428273200989, + "total_bits": 49876992, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953743815422058, + "total_bits": 56921088, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968283176422119, + "total_bits": 58863616, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9982096552848816, + "total_bits": 75795456, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.4.mlp": [ + { + "accuracy": 0.9237771034240723, + "total_bits": 77577584, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9260733723640442, + "total_bits": 80461168, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9377062320709229, + "total_bits": 89861376, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9413989186286926, + "total_bits": 100805888, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9613701105117798, + "total_bits": 113536912, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9643630981445312, + "total_bits": 116422912, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9694887399673462, + "total_bits": 125157776, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9797216653823853, + "total_bits": 142997648, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9816733002662659, + "total_bits": 145111296, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9802861213684082, + "total_bits": 147550096, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.982772707939148, + "total_bits": 150436096, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9900387525558472, + "total_bits": 181563280, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9909219741821289, + "total_bits": 184449280, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942711591720581, + "total_bits": 209940368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.994611918926239, + "total_bits": 218140224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9954437017440796, + "total_bits": 237538880, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9971277117729187, + "total_bits": 277909056, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.5.self_attn": [ + { + "accuracy": 0.9295307397842407, + "total_bits": 20060160, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.927514910697937, + "total_bits": 20715520, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9535272717475891, + "total_bits": 21157120, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9609613418579102, + "total_bits": 25219584, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9646806716918945, + "total_bits": 29789184, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.961962878704071, + "total_bits": 29806336, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9787601232528687, + "total_bits": 38046720, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9816331267356873, + "total_bits": 38063872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9819358587265015, + "total_bits": 38379008, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9824539422988892, + "total_bits": 38940672, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9835801720619202, + "total_bits": 39243520, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9858303070068359, + "total_bits": 39550464, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9884825348854065, + "total_bits": 39983104, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9895063042640686, + "total_bits": 40439808, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9933742880821228, + "total_bits": 49122304, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9936744570732117, + "total_bits": 49876992, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942531585693359, + "total_bits": 56921088, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9958630204200745, + "total_bits": 58863616, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976080656051636, + "total_bits": 75795456, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.5.mlp": [ + { + "accuracy": 0.9107840657234192, + "total_bits": 77577584, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9135586619377136, + "total_bits": 80461168, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9269999265670776, + "total_bits": 89861376, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9312992691993713, + "total_bits": 100805888, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9548637270927429, + "total_bits": 113536912, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9582927823066711, + "total_bits": 116422912, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9642459154129028, + "total_bits": 125157776, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9762223958969116, + "total_bits": 142997648, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9785003066062927, + "total_bits": 145111296, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9769856929779053, + "total_bits": 147550096, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9798451662063599, + "total_bits": 150436096, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9883550405502319, + "total_bits": 181563280, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.989416241645813, + "total_bits": 184449280, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9932880997657776, + "total_bits": 209940368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9937666058540344, + "total_bits": 218140224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.994745135307312, + "total_bits": 237538880, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967634081840515, + "total_bits": 277909056, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.6.self_attn": [ + { + "accuracy": 0.929566502571106, + "total_bits": 20060160, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.934005618095398, + "total_bits": 20715520, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9503582715988159, + "total_bits": 21157120, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9593666791915894, + "total_bits": 25219584, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9660553336143494, + "total_bits": 29789184, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9665283560752869, + "total_bits": 29806336, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.979071319103241, + "total_bits": 38046720, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9796192646026611, + "total_bits": 38063872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9815175533294678, + "total_bits": 38379008, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9823297262191772, + "total_bits": 38940672, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9821459054946899, + "total_bits": 39243520, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9829924702644348, + "total_bits": 39550464, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9871101975440979, + "total_bits": 39983104, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9877837896347046, + "total_bits": 40439808, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.99301677942276, + "total_bits": 49122304, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9934794306755066, + "total_bits": 49876992, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9942551255226135, + "total_bits": 56921088, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9955906867980957, + "total_bits": 58863616, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.997452437877655, + "total_bits": 75795456, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.6.mlp": [ + { + "accuracy": 0.9011241793632507, + "total_bits": 77577584, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9042137861251831, + "total_bits": 80461168, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9196240901947021, + "total_bits": 89861376, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.924598753452301, + "total_bits": 100805888, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9498307704925537, + "total_bits": 113536912, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9537641406059265, + "total_bits": 116422912, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9604710340499878, + "total_bits": 125157776, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9734518527984619, + "total_bits": 142997648, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9760445356369019, + "total_bits": 145111296, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9743806719779968, + "total_bits": 147550096, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9776084423065186, + "total_bits": 150436096, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9870222806930542, + "total_bits": 181563280, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9881497621536255, + "total_bits": 184449280, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9924420118331909, + "total_bits": 209940368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9929544925689697, + "total_bits": 218140224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9940603971481323, + "total_bits": 237538880, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961728453636169, + "total_bits": 277909056, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.7.self_attn": [ + { + "accuracy": 0.8939898610115051, + "total_bits": 20060160, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.898218035697937, + "total_bits": 20715520, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9256796836853027, + "total_bits": 21157120, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9358251094818115, + "total_bits": 25219584, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9466401934623718, + "total_bits": 29789184, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.952196478843689, + "total_bits": 29806336, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9712908864021301, + "total_bits": 38046720, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9734964370727539, + "total_bits": 38063872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9746610522270203, + "total_bits": 38379008, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9753915667533875, + "total_bits": 38940672, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9746209383010864, + "total_bits": 39243520, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9774773120880127, + "total_bits": 39550464, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9832538366317749, + "total_bits": 39983104, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9848795533180237, + "total_bits": 40439808, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9899031519889832, + "total_bits": 49122304, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9910617470741272, + "total_bits": 49876992, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9914121031761169, + "total_bits": 56921088, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9943784475326538, + "total_bits": 58863616, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966936707496643, + "total_bits": 75795456, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.7.mlp": [ + { + "accuracy": 0.8725412487983704, + "total_bits": 77577584, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.8878173828125, + "total_bits": 80461168, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.8953086733818054, + "total_bits": 89861376, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.8980252146720886, + "total_bits": 100805888, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9569078087806702, + "total_bits": 113536912, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9597389698028564, + "total_bits": 116422912, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9647153615951538, + "total_bits": 125157776, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9772448539733887, + "total_bits": 142997648, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.978602409362793, + "total_bits": 145111296, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9784143567085266, + "total_bits": 147550096, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9804797172546387, + "total_bits": 150436096, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9888095855712891, + "total_bits": 181563280, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9895743727684021, + "total_bits": 184449280, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9931209087371826, + "total_bits": 209940368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9936420321464539, + "total_bits": 218140224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9944472312927246, + "total_bits": 237538880, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9959279894828796, + "total_bits": 277909056, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.8.self_attn": [ + { + "accuracy": 0.9429652690887451, + "total_bits": 20060160, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9465553760528564, + "total_bits": 20715520, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9538396596908569, + "total_bits": 21157120, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9613441228866577, + "total_bits": 25219584, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9707050919532776, + "total_bits": 29789184, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.970777153968811, + "total_bits": 29806336, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9783258438110352, + "total_bits": 38046720, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9799899458885193, + "total_bits": 38063872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.980975329875946, + "total_bits": 38379008, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9819540977478027, + "total_bits": 38940672, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9844391345977783, + "total_bits": 39243520, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9857968688011169, + "total_bits": 39550464, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9877724647521973, + "total_bits": 39983104, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.988639771938324, + "total_bits": 40439808, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9929889440536499, + "total_bits": 49122304, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9933602213859558, + "total_bits": 49876992, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941120147705078, + "total_bits": 56921088, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9949002861976624, + "total_bits": 58863616, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9968420267105103, + "total_bits": 75795456, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.8.mlp": [ + { + "accuracy": 0.9102291464805603, + "total_bits": 77577584, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9130614995956421, + "total_bits": 80461168, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9257187843322754, + "total_bits": 89861376, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9298596382141113, + "total_bits": 100805888, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9545649290084839, + "total_bits": 113536912, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.958125114440918, + "total_bits": 116422912, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.963689386844635, + "total_bits": 125157776, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9760123491287231, + "total_bits": 142997648, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9782727956771851, + "total_bits": 145111296, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9768155217170715, + "total_bits": 147550096, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9797094464302063, + "total_bits": 150436096, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9881933927536011, + "total_bits": 181563280, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9892504215240479, + "total_bits": 184449280, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9930516481399536, + "total_bits": 209940368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9936263561248779, + "total_bits": 218140224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9945302605628967, + "total_bits": 237538880, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9965332746505737, + "total_bits": 277909056, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.9.self_attn": [ + { + "accuracy": 0.9282899498939514, + "total_bits": 20060160, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9351011514663696, + "total_bits": 20715520, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9455836415290833, + "total_bits": 21157120, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9576588273048401, + "total_bits": 25219584, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.966029942035675, + "total_bits": 29789184, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9678690433502197, + "total_bits": 29806336, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9803181290626526, + "total_bits": 38046720, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.981079638004303, + "total_bits": 38063872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9829471707344055, + "total_bits": 38379008, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9838234186172485, + "total_bits": 38940672, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9824768900871277, + "total_bits": 39243520, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9841254949569702, + "total_bits": 39550464, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9869195222854614, + "total_bits": 39983104, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9880546927452087, + "total_bits": 40439808, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9925569295883179, + "total_bits": 49122304, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9933850169181824, + "total_bits": 49876992, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9943628311157227, + "total_bits": 56921088, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9956436157226562, + "total_bits": 58863616, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9976159930229187, + "total_bits": 75795456, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.9.mlp": [ + { + "accuracy": 0.9005219340324402, + "total_bits": 77577584, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9035323262214661, + "total_bits": 80461168, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9191083312034607, + "total_bits": 89861376, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9241676926612854, + "total_bits": 100805888, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9495804309844971, + "total_bits": 113536912, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9535504579544067, + "total_bits": 116422912, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9603325128555298, + "total_bits": 125157776, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9733299016952515, + "total_bits": 142997648, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9759061932563782, + "total_bits": 145111296, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9742476940155029, + "total_bits": 147550096, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9775040745735168, + "total_bits": 150436096, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9869392514228821, + "total_bits": 181563280, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9881041049957275, + "total_bits": 184449280, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9923625588417053, + "total_bits": 209940368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9929435849189758, + "total_bits": 218140224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9940640926361084, + "total_bits": 237538880, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961867332458496, + "total_bits": 277909056, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.10.self_attn": [ + { + "accuracy": 0.9166128635406494, + "total_bits": 20060160, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9199341535568237, + "total_bits": 20715520, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9402506351470947, + "total_bits": 21157120, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9516711235046387, + "total_bits": 25219584, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9613158702850342, + "total_bits": 29789184, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9613581895828247, + "total_bits": 29806336, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9762513637542725, + "total_bits": 38046720, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9778940677642822, + "total_bits": 38063872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9801464676856995, + "total_bits": 38379008, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9806835055351257, + "total_bits": 38940672, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9806572198867798, + "total_bits": 39243520, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9813951253890991, + "total_bits": 39550464, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9851117134094238, + "total_bits": 39983104, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.985896110534668, + "total_bits": 40439808, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9913346171379089, + "total_bits": 49122304, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9925958514213562, + "total_bits": 49876992, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.993000864982605, + "total_bits": 56921088, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9951004385948181, + "total_bits": 58863616, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9967713356018066, + "total_bits": 75795456, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.10.mlp": [ + { + "accuracy": 0.8975040912628174, + "total_bits": 77577584, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9005443453788757, + "total_bits": 80461168, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9163171052932739, + "total_bits": 89861376, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9213529229164124, + "total_bits": 100805888, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9480594992637634, + "total_bits": 113536912, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9521247744560242, + "total_bits": 116422912, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9589993357658386, + "total_bits": 125157776, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9727143049240112, + "total_bits": 142997648, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9752895832061768, + "total_bits": 145111296, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9734779000282288, + "total_bits": 147550096, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9768207669258118, + "total_bits": 150436096, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9865581393241882, + "total_bits": 181563280, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9877426624298096, + "total_bits": 184449280, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9921929836273193, + "total_bits": 209940368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9927415251731873, + "total_bits": 218140224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9938681721687317, + "total_bits": 237538880, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961109161376953, + "total_bits": 277909056, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.11.self_attn": [ + { + "accuracy": 0.9074253439903259, + "total_bits": 20060160, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9098256230354309, + "total_bits": 20715520, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9236315488815308, + "total_bits": 21157120, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9354451894760132, + "total_bits": 25219584, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9524930119514465, + "total_bits": 29789184, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9559698700904846, + "total_bits": 29806336, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9724481105804443, + "total_bits": 38046720, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9728968739509583, + "total_bits": 38063872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9760230779647827, + "total_bits": 38379008, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9770252704620361, + "total_bits": 38940672, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9767003655433655, + "total_bits": 39243520, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9780744910240173, + "total_bits": 39550464, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9813607931137085, + "total_bits": 39983104, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9832541942596436, + "total_bits": 40439808, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9898026585578918, + "total_bits": 49122304, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9913036227226257, + "total_bits": 49876992, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9922217130661011, + "total_bits": 56921088, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941311478614807, + "total_bits": 58863616, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962098598480225, + "total_bits": 75795456, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.11.mlp": [ + { + "accuracy": 0.8892025947570801, + "total_bits": 77577584, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.8925290703773499, + "total_bits": 80461168, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9095908999443054, + "total_bits": 89861376, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9150708317756653, + "total_bits": 100805888, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.943681001663208, + "total_bits": 113536912, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9481810331344604, + "total_bits": 116422912, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9556393027305603, + "total_bits": 125157776, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9703974723815918, + "total_bits": 142997648, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9732106328010559, + "total_bits": 145111296, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9711923599243164, + "total_bits": 147550096, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.974910318851471, + "total_bits": 150436096, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.985415518283844, + "total_bits": 181563280, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9867509007453918, + "total_bits": 184449280, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9915050268173218, + "total_bits": 209940368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9921199679374695, + "total_bits": 218140224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.993339478969574, + "total_bits": 237538880, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9957578778266907, + "total_bits": 277909056, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.12.self_attn": [ + { + "accuracy": 0.9093387126922607, + "total_bits": 20060160, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9125458598136902, + "total_bits": 20715520, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9302800893783569, + "total_bits": 21157120, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9454154968261719, + "total_bits": 25219584, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.954777181148529, + "total_bits": 29789184, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9549955725669861, + "total_bits": 29806336, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.974107027053833, + "total_bits": 38046720, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9747903347015381, + "total_bits": 38063872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9762389063835144, + "total_bits": 38379008, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9773101210594177, + "total_bits": 38940672, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9770426750183105, + "total_bits": 39243520, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9790635704994202, + "total_bits": 39550464, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9825307130813599, + "total_bits": 39983104, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9835410118103027, + "total_bits": 40439808, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9900920391082764, + "total_bits": 49122304, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.990960955619812, + "total_bits": 49876992, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.992554783821106, + "total_bits": 56921088, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941905736923218, + "total_bits": 58863616, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9961017370223999, + "total_bits": 75795456, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.12.mlp": [ + { + "accuracy": 0.8799053430557251, + "total_bits": 77577584, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.8837117552757263, + "total_bits": 80461168, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9029205441474915, + "total_bits": 89861376, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9091644883155823, + "total_bits": 100805888, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.939030110836029, + "total_bits": 113536912, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9439636468887329, + "total_bits": 116422912, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9522704482078552, + "total_bits": 125157776, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9675298929214478, + "total_bits": 142997648, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.970690131187439, + "total_bits": 145111296, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9687992334365845, + "total_bits": 147550096, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9727943539619446, + "total_bits": 150436096, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.984127938747406, + "total_bits": 181563280, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9854415655136108, + "total_bits": 184449280, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9905670881271362, + "total_bits": 209940368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9913174510002136, + "total_bits": 218140224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9926691651344299, + "total_bits": 237538880, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9950883984565735, + "total_bits": 277909056, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.13.self_attn": [ + { + "accuracy": 0.9103389978408813, + "total_bits": 20060160, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9127525091171265, + "total_bits": 20715520, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9248526692390442, + "total_bits": 21157120, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9391911625862122, + "total_bits": 25219584, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9532256722450256, + "total_bits": 29789184, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9552167654037476, + "total_bits": 29806336, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9726668000221252, + "total_bits": 38046720, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9739846587181091, + "total_bits": 38063872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9760927557945251, + "total_bits": 38379008, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9772098064422607, + "total_bits": 38940672, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.976651132106781, + "total_bits": 39243520, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9778305888175964, + "total_bits": 39550464, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9813156127929688, + "total_bits": 39983104, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9830231666564941, + "total_bits": 40439808, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.989805281162262, + "total_bits": 49122304, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9907415509223938, + "total_bits": 49876992, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9927775263786316, + "total_bits": 56921088, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9938715696334839, + "total_bits": 58863616, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.995988130569458, + "total_bits": 75795456, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.13.mlp": [ + { + "accuracy": 0.8769088983535767, + "total_bits": 77577584, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.8808553218841553, + "total_bits": 80461168, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9002501368522644, + "total_bits": 89861376, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9065883755683899, + "total_bits": 100805888, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9374593496322632, + "total_bits": 113536912, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9425934553146362, + "total_bits": 116422912, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9509568810462952, + "total_bits": 125157776, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9667003750801086, + "total_bits": 142997648, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9699137210845947, + "total_bits": 145111296, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9679970741271973, + "total_bits": 147550096, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9721119403839111, + "total_bits": 150436096, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9836855530738831, + "total_bits": 181563280, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9850828647613525, + "total_bits": 184449280, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.99024897813797, + "total_bits": 209940368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9910765290260315, + "total_bits": 218140224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.992433488368988, + "total_bits": 237538880, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9949423670768738, + "total_bits": 277909056, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.14.self_attn": [ + { + "accuracy": 0.9029273986816406, + "total_bits": 20060160, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9088284373283386, + "total_bits": 20715520, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9244690537452698, + "total_bits": 21157120, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9410009384155273, + "total_bits": 25219584, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9503385424613953, + "total_bits": 29789184, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9523110389709473, + "total_bits": 29806336, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9707006812095642, + "total_bits": 38046720, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9712740778923035, + "total_bits": 38063872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9749066233634949, + "total_bits": 38379008, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9763739705085754, + "total_bits": 38940672, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9756332039833069, + "total_bits": 39243520, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9773731231689453, + "total_bits": 39550464, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9810095429420471, + "total_bits": 39983104, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9826784133911133, + "total_bits": 40439808, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9895246624946594, + "total_bits": 49122304, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9904550313949585, + "total_bits": 49876992, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.992099940776825, + "total_bits": 56921088, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9933830499649048, + "total_bits": 58863616, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9960741996765137, + "total_bits": 75795456, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.14.mlp": [ + { + "accuracy": 0.8759914040565491, + "total_bits": 77577584, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.8799580335617065, + "total_bits": 80461168, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.8990435004234314, + "total_bits": 89861376, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9056342840194702, + "total_bits": 100805888, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9368672966957092, + "total_bits": 113536912, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9420580267906189, + "total_bits": 116422912, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9503653645515442, + "total_bits": 125157776, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9660590291023254, + "total_bits": 142997648, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9694578647613525, + "total_bits": 145111296, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9677258729934692, + "total_bits": 147550096, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9718116521835327, + "total_bits": 150436096, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9835555553436279, + "total_bits": 181563280, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9848629832267761, + "total_bits": 184449280, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9901185631752014, + "total_bits": 209940368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9910452961921692, + "total_bits": 218140224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9924203753471375, + "total_bits": 237538880, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9949218034744263, + "total_bits": 277909056, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.15.self_attn": [ + { + "accuracy": 0.8893100023269653, + "total_bits": 20060160, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.8987782001495361, + "total_bits": 20715520, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9152678847312927, + "total_bits": 21157120, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.931481122970581, + "total_bits": 25219584, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9433881640434265, + "total_bits": 29789184, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9469841122627258, + "total_bits": 29806336, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9644014239311218, + "total_bits": 38046720, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.966644823551178, + "total_bits": 38063872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9715496897697449, + "total_bits": 38379008, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.972545325756073, + "total_bits": 38940672, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9716885685920715, + "total_bits": 39243520, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9752720594406128, + "total_bits": 39550464, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.97758549451828, + "total_bits": 39983104, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9804641604423523, + "total_bits": 40439808, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.987440824508667, + "total_bits": 49122304, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9894666075706482, + "total_bits": 49876992, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9902193546295166, + "total_bits": 56921088, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9929558634757996, + "total_bits": 58863616, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.995780348777771, + "total_bits": 75795456, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.15.mlp": [ + { + "accuracy": 0.8659887909889221, + "total_bits": 77577584, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.8704725503921509, + "total_bits": 80461168, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.8915677070617676, + "total_bits": 89861376, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.898992657661438, + "total_bits": 100805888, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.931713879108429, + "total_bits": 113536912, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9373441934585571, + "total_bits": 116422912, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9467344284057617, + "total_bits": 125157776, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9631244540214539, + "total_bits": 142997648, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9667316675186157, + "total_bits": 145111296, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9649596214294434, + "total_bits": 147550096, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9694186449050903, + "total_bits": 150436096, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9819871783256531, + "total_bits": 181563280, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9834125638008118, + "total_bits": 184449280, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9889383316040039, + "total_bits": 209940368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9900503158569336, + "total_bits": 218140224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9915703535079956, + "total_bits": 237538880, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.994057297706604, + "total_bits": 277909056, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.16.self_attn": [ + { + "accuracy": 0.9030227661132812, + "total_bits": 20060160, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9026775360107422, + "total_bits": 20715520, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9248975515365601, + "total_bits": 21157120, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9395556449890137, + "total_bits": 25219584, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9499831199645996, + "total_bits": 29789184, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9537744522094727, + "total_bits": 29806336, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9693153500556946, + "total_bits": 38046720, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9713008999824524, + "total_bits": 38063872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9742843508720398, + "total_bits": 38379008, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9757671356201172, + "total_bits": 38940672, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9754165410995483, + "total_bits": 39243520, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9768913984298706, + "total_bits": 39550464, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9808060526847839, + "total_bits": 39983104, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9825798273086548, + "total_bits": 40439808, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9894065856933594, + "total_bits": 49122304, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9905060529708862, + "total_bits": 49876992, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9920088648796082, + "total_bits": 56921088, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9940291047096252, + "total_bits": 58863616, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9966192841529846, + "total_bits": 75795456, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.16.mlp": [ + { + "accuracy": 0.8581516742706299, + "total_bits": 77577584, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.8626471161842346, + "total_bits": 80461168, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.8861152529716492, + "total_bits": 89861376, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.8943722248077393, + "total_bits": 100805888, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9277361631393433, + "total_bits": 113536912, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9336879849433899, + "total_bits": 116422912, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9439961314201355, + "total_bits": 125157776, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9609542489051819, + "total_bits": 142997648, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9649822115898132, + "total_bits": 145111296, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9629716873168945, + "total_bits": 147550096, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.967682957649231, + "total_bits": 150436096, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9809861779212952, + "total_bits": 181563280, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9825153350830078, + "total_bits": 184449280, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.988379716873169, + "total_bits": 209940368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9895303249359131, + "total_bits": 218140224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9912123680114746, + "total_bits": 237538880, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.99379962682724, + "total_bits": 277909056, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.17.self_attn": [ + { + "accuracy": 0.8964958190917969, + "total_bits": 20060160, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.8936324119567871, + "total_bits": 20715520, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9206169843673706, + "total_bits": 21157120, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9357521533966064, + "total_bits": 25219584, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9489548802375793, + "total_bits": 29789184, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9497196674346924, + "total_bits": 29806336, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.965671718120575, + "total_bits": 38046720, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9693997502326965, + "total_bits": 38063872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.972968578338623, + "total_bits": 38379008, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9741232991218567, + "total_bits": 38940672, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9734308123588562, + "total_bits": 39243520, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9761313199996948, + "total_bits": 39550464, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.980276882648468, + "total_bits": 39983104, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9817514419555664, + "total_bits": 40439808, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9887178540229797, + "total_bits": 49122304, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9900801181793213, + "total_bits": 49876992, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9910966157913208, + "total_bits": 56921088, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9936937093734741, + "total_bits": 58863616, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9963500499725342, + "total_bits": 75795456, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.17.mlp": [ + { + "accuracy": 0.8630141019821167, + "total_bits": 77577584, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.8671334981918335, + "total_bits": 80461168, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.8894500732421875, + "total_bits": 89861376, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.896891176700592, + "total_bits": 100805888, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9303850531578064, + "total_bits": 113536912, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.935860276222229, + "total_bits": 116422912, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9456917643547058, + "total_bits": 125157776, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9628193378448486, + "total_bits": 142997648, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9665745496749878, + "total_bits": 145111296, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9643836617469788, + "total_bits": 147550096, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9688860774040222, + "total_bits": 150436096, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9818735718727112, + "total_bits": 181563280, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9834638833999634, + "total_bits": 184449280, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9892539381980896, + "total_bits": 209940368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9901444911956787, + "total_bits": 218140224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9917520880699158, + "total_bits": 237538880, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9944738745689392, + "total_bits": 277909056, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.18.self_attn": [ + { + "accuracy": 0.9019427299499512, + "total_bits": 20060160, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9078596830368042, + "total_bits": 20715520, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9219541549682617, + "total_bits": 21157120, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9366336464881897, + "total_bits": 25219584, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9488438367843628, + "total_bits": 29789184, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9499874711036682, + "total_bits": 29806336, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9651927351951599, + "total_bits": 38046720, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9676461815834045, + "total_bits": 38063872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9716928005218506, + "total_bits": 38379008, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9732497334480286, + "total_bits": 38940672, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9747809767723083, + "total_bits": 39243520, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9761047959327698, + "total_bits": 39550464, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9792559742927551, + "total_bits": 39983104, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9810831546783447, + "total_bits": 40439808, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9881407022476196, + "total_bits": 49122304, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9902014136314392, + "total_bits": 49876992, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9903944730758667, + "total_bits": 56921088, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9935200810432434, + "total_bits": 58863616, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.996117889881134, + "total_bits": 75795456, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.18.mlp": [ + { + "accuracy": 0.8605747222900391, + "total_bits": 77577584, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.8647972941398621, + "total_bits": 80461168, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.8866601586341858, + "total_bits": 89861376, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.8939719796180725, + "total_bits": 100805888, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9293847680091858, + "total_bits": 113536912, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9347387552261353, + "total_bits": 116422912, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9445092678070068, + "total_bits": 125157776, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9624964594841003, + "total_bits": 142997648, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9660989046096802, + "total_bits": 145111296, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9639678001403809, + "total_bits": 147550096, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9684432744979858, + "total_bits": 150436096, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9817145466804504, + "total_bits": 181563280, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.983350932598114, + "total_bits": 184449280, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9893238544464111, + "total_bits": 209940368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9901590347290039, + "total_bits": 218140224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.991756021976471, + "total_bits": 237538880, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9947014451026917, + "total_bits": 277909056, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.19.self_attn": [ + { + "accuracy": 0.9168154001235962, + "total_bits": 20060160, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9219701886177063, + "total_bits": 20715520, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9349164962768555, + "total_bits": 21157120, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9477123022079468, + "total_bits": 25219584, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.95574951171875, + "total_bits": 29789184, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9583690762519836, + "total_bits": 29806336, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9674597382545471, + "total_bits": 38046720, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9718824028968811, + "total_bits": 38063872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9753310680389404, + "total_bits": 38379008, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.976052463054657, + "total_bits": 38940672, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9785422682762146, + "total_bits": 39243520, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9800976514816284, + "total_bits": 39550464, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9818910956382751, + "total_bits": 39983104, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9832127690315247, + "total_bits": 40439808, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9896795749664307, + "total_bits": 49122304, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9912778735160828, + "total_bits": 49876992, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9914635419845581, + "total_bits": 56921088, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941350817680359, + "total_bits": 58863616, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.996364414691925, + "total_bits": 75795456, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.19.mlp": [ + { + "accuracy": 0.8595742583274841, + "total_bits": 77577584, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.8637060523033142, + "total_bits": 80461168, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.8856976628303528, + "total_bits": 89861376, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.8930126428604126, + "total_bits": 100805888, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9288092851638794, + "total_bits": 113536912, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.934228777885437, + "total_bits": 116422912, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.944034993648529, + "total_bits": 125157776, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9622354507446289, + "total_bits": 142997648, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9659056663513184, + "total_bits": 145111296, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9637033343315125, + "total_bits": 147550096, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9682234525680542, + "total_bits": 150436096, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9816124439239502, + "total_bits": 181563280, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.983294665813446, + "total_bits": 184449280, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.989338219165802, + "total_bits": 209940368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9901406764984131, + "total_bits": 218140224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9917548894882202, + "total_bits": 237538880, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.994799792766571, + "total_bits": 277909056, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.20.self_attn": [ + { + "accuracy": 0.9311497807502747, + "total_bits": 20060160, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.935117781162262, + "total_bits": 20715520, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9443801045417786, + "total_bits": 21157120, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9567604660987854, + "total_bits": 25219584, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9638588428497314, + "total_bits": 29789184, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.96546870470047, + "total_bits": 29806336, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9754751324653625, + "total_bits": 38046720, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9771744608879089, + "total_bits": 38063872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9791628122329712, + "total_bits": 38379008, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9801713228225708, + "total_bits": 38940672, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9817180037498474, + "total_bits": 39243520, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9829753041267395, + "total_bits": 39550464, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9849694967269897, + "total_bits": 39983104, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9862427711486816, + "total_bits": 40439808, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9914953708648682, + "total_bits": 49122304, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9925433397293091, + "total_bits": 49876992, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9932183027267456, + "total_bits": 56921088, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9953316450119019, + "total_bits": 58863616, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9972721934318542, + "total_bits": 75795456, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.20.mlp": [ + { + "accuracy": 0.8580663204193115, + "total_bits": 77577584, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.8623858690261841, + "total_bits": 80461168, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.8836959004402161, + "total_bits": 89861376, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.8906974792480469, + "total_bits": 100805888, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9279429316520691, + "total_bits": 113536912, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9335986375808716, + "total_bits": 116422912, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9430612325668335, + "total_bits": 125157776, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9617114663124084, + "total_bits": 142997648, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9654552936553955, + "total_bits": 145111296, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9632083177566528, + "total_bits": 147550096, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9678635001182556, + "total_bits": 150436096, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9813076853752136, + "total_bits": 181563280, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9830392003059387, + "total_bits": 184449280, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9890173077583313, + "total_bits": 209940368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9899916648864746, + "total_bits": 218140224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9915400743484497, + "total_bits": 237538880, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9946576356887817, + "total_bits": 277909056, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.21.self_attn": [ + { + "accuracy": 0.9130366444587708, + "total_bits": 20060160, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9170297980308533, + "total_bits": 20715520, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9306029081344604, + "total_bits": 21157120, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9450668096542358, + "total_bits": 25219584, + "q_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9556155800819397, + "total_bits": 29789184, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.956890344619751, + "total_bits": 29806336, + "q_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64, + "3": 64 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9742314219474792, + "total_bits": 38046720, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9751864075660706, + "total_bits": 38063872, + "q_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.97710120677948, + "total_bits": 38379008, + "q_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 64 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9782953858375549, + "total_bits": 38940672, + "q_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9779431819915771, + "total_bits": 39243520, + "q_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9796767830848694, + "total_bits": 39550464, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9831433892250061, + "total_bits": 39983104, + "q_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 64 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 64, + "4": 64 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9846049547195435, + "total_bits": 40439808, + "q_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "5": 32 + }, + "bits": [ + 5 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.990371823310852, + "total_bits": 49122304, + "q_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9915686249732971, + "total_bits": 49876992, + "q_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.992496907711029, + "total_bits": 56921088, + "q_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9941430687904358, + "total_bits": 58863616, + "q_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 32 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "6": 32 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9962378740310669, + "total_bits": 75795456, + "q_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "k_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "v_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "o_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.layers.21.mlp": [ + { + "accuracy": 0.8655729293823242, + "total_bits": 77577584, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.8697267770767212, + "total_bits": 80461168, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "6": 32, + "3": 64, + "2": 64 + }, + "bits": [ + 6, + 3, + 2 + ], + "bits_prop": [ + 0.05, + 0.2, + 0.75 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.886796236038208, + "total_bits": 89861376, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "3": 32 + }, + "bits": [ + 5, + 3 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.8923400044441223, + "total_bits": 100805888, + "gate_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "3": 64, + "2": 64 + }, + "bits": [ + 3, + 2 + ], + "bits_prop": [ + 0.3, + 0.7 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9315621256828308, + "total_bits": 113536912, + "gate_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 128, + "3": 128 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128, + "3": 128 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9373039603233337, + "total_bits": 116422912, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32, + "3": 32 + }, + "bits": [ + 8, + 4, + 3 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9446560144424438, + "total_bits": 125157776, + "gate_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32, + "3": 32 + }, + "bits": [ + 4, + 3 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9627256393432617, + "total_bits": 142997648, + "gate_proj": { + "group_size": { + "4": 128 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 128 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9660356044769287, + "total_bits": 145111296, + "gate_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "4": 32 + }, + "bits": [ + 4 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "4": 32 + }, + "bits": [ + 8, + 4 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9644781947135925, + "total_bits": 147550096, + "gate_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 128, + "4": 128 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 128, + "4": 128 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9688010811805725, + "total_bits": 150436096, + "gate_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "5": 32, + "4": 32 + }, + "bits": [ + 5, + 4 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "5": 32, + "4": 32 + }, + "bits": [ + 8, + 5, + 4 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9813495874404907, + "total_bits": 181563280, + "gate_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128, + "5": 128 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128, + "5": 128 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9820441007614136, + "total_bits": 184449280, + "gate_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 32, + "5": 32 + }, + "bits": [ + 6, + 5 + ], + "bits_prop": [ + 0.25, + 0.75 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 32, + "5": 32 + }, + "bits": [ + 8, + 6, + 5 + ], + "bits_prop": [ + 0.05, + 0.1, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9878106713294983, + "total_bits": 209940368, + "gate_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "6": 128 + }, + "bits": [ + 6 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 32, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.05, + 0.95 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9890310168266296, + "total_bits": 218140224, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.15, + 0.85 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9900964498519897, + "total_bits": 237538880, + "gate_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128, + "6": 128 + }, + "bits": [ + 8, + 6 + ], + "bits_prop": [ + 0.1, + 0.9 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + }, + { + "accuracy": 0.9923749566078186, + "total_bits": 277909056, + "gate_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "up_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + }, + "down_proj": { + "group_size": { + "8": 128 + }, + "bits": [ + 8 + ], + "bits_prop": [ + 1 + ], + "scale_bits": 4 + } + } + ], + "model.norm.norm": null, + "lm_head.linear": null + }, + "last_module_idx": 46 } \ No newline at end of file