| { | |
| "measurement": { | |
| "model.layers.0": { | |
| "accuracy": 0.9122365315755208, | |
| "total_bits": 450454080, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.1": { | |
| "accuracy": 0.9080530007680258, | |
| "total_bits": 638304288, | |
| "q_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.2": { | |
| "accuracy": 0.9275489846865336, | |
| "total_bits": 450454080, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.3": { | |
| "accuracy": 0.8821520010630289, | |
| "total_bits": 450454080, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.4": { | |
| "accuracy": 0.8701377709706625, | |
| "total_bits": 450454080, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.5": { | |
| "accuracy": 0.857724110285441, | |
| "total_bits": 450454080, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.6": { | |
| "accuracy": 0.8618481953938801, | |
| "total_bits": 450454080, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.7": { | |
| "accuracy": 0.8569034735361736, | |
| "total_bits": 483482688, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.8": { | |
| "accuracy": 0.8387942314147949, | |
| "total_bits": 450454080, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.9": { | |
| "accuracy": 0.8487000465393066, | |
| "total_bits": 450454080, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.10": { | |
| "accuracy": 0.8522807757059734, | |
| "total_bits": 483482688, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.11": { | |
| "accuracy": 0.8486220041910807, | |
| "total_bits": 483482688, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.12": { | |
| "accuracy": 0.8406097094217937, | |
| "total_bits": 483482688, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.13": { | |
| "accuracy": 0.8556122779846191, | |
| "total_bits": 483482688, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.14": { | |
| "accuracy": 0.8522577285766602, | |
| "total_bits": 483482688, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.15": { | |
| "accuracy": 0.8361595471700033, | |
| "total_bits": 450454080, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.16": { | |
| "accuracy": 0.8548601468404134, | |
| "total_bits": 483482688, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.17": { | |
| "accuracy": 0.8503810564676921, | |
| "total_bits": 483482688, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.18": { | |
| "accuracy": 0.8818686803181967, | |
| "total_bits": 539218464, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.19": { | |
| "accuracy": 0.8884480794270834, | |
| "total_bits": 539218464, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.20": { | |
| "accuracy": 0.8379961649576823, | |
| "total_bits": 450454080, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.21": { | |
| "accuracy": 0.8830145200093588, | |
| "total_bits": 539218464, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.22": { | |
| "accuracy": 0.8894084294637045, | |
| "total_bits": 572247072, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.23": { | |
| "accuracy": 0.870563824971517, | |
| "total_bits": 539218464, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.24": { | |
| "accuracy": 0.8652655283610026, | |
| "total_bits": 539218464, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.25": { | |
| "accuracy": 0.8744535446166992, | |
| "total_bits": 572247072, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.26": { | |
| "accuracy": 0.8678053220113119, | |
| "total_bits": 572247072, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.27": { | |
| "accuracy": 0.8675735791524252, | |
| "total_bits": 572247072, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.28": { | |
| "accuracy": 0.860187848409017, | |
| "total_bits": 539218464, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.29": { | |
| "accuracy": 0.8641198476155599, | |
| "total_bits": 539218464, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.30": { | |
| "accuracy": 0.9148619969685872, | |
| "total_bits": 661014048, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| }, | |
| "model.layers.31": { | |
| "accuracy": 0.8672019640604655, | |
| "total_bits": 450454080, | |
| "q_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "k_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "v_proj": { | |
| "group_size": { | |
| "4": 128 | |
| }, | |
| "bits": [ | |
| 4 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "o_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "up_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "gate_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| }, | |
| "down_proj": { | |
| "group_size": { | |
| "2": 64 | |
| }, | |
| "bits": [ | |
| 2 | |
| ], | |
| "bits_prop": [ | |
| 1 | |
| ], | |
| "scale_bits": 4 | |
| } | |
| } | |
| } | |
| } |