| { |
| "measurement": { |
| "model.layers.0": { |
| "accuracy": 0.87518310546875, |
| "total_bits": 889454592.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.1": { |
| "accuracy": 0.95147705078125, |
| "total_bits": 889454592.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.2": { |
| "accuracy": 0.980438232421875, |
| "total_bits": 889454592.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.3": { |
| "accuracy": 0.97711181640625, |
| "total_bits": 889454592.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.4": { |
| "accuracy": 0.9727783203125, |
| "total_bits": 889454592.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.5": { |
| "accuracy": 0.964508056640625, |
| "total_bits": 889454592.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.6": { |
| "accuracy": 0.957672119140625, |
| "total_bits": 889454592.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.7": { |
| "accuracy": 0.962005615234375, |
| "total_bits": 889454592.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.8": { |
| "accuracy": 0.96429443359375, |
| "total_bits": 889454592.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.9": { |
| "accuracy": 0.958831787109375, |
| "total_bits": 889454592.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.10": { |
| "accuracy": 0.96038818359375, |
| "total_bits": 889454592.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.11": { |
| "accuracy": 0.959991455078125, |
| "total_bits": 889454592.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.12": { |
| "accuracy": 0.957183837890625, |
| "total_bits": 889454592.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.13": { |
| "accuracy": 0.954437255859375, |
| "total_bits": 889454592.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.14": { |
| "accuracy": 0.959442138671875, |
| "total_bits": 889454592.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.15": { |
| "accuracy": 0.94647216796875, |
| "total_bits": 889454592.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.16": { |
| "accuracy": 0.9613037109375, |
| "total_bits": 889454592.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.17": { |
| "accuracy": 0.9733734130859375, |
| "total_bits": 889454592.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.18": { |
| "accuracy": 0.9786529541015625, |
| "total_bits": 889454592.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.19": { |
| "accuracy": 0.97515869140625, |
| "total_bits": 889454592.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.20": { |
| "accuracy": 0.9787445068359375, |
| "total_bits": 889454592.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.21": { |
| "accuracy": 0.971771240234375, |
| "total_bits": 889454592.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.22": { |
| "accuracy": 0.979736328125, |
| "total_bits": 889454592.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.23": { |
| "accuracy": 0.983734130859375, |
| "total_bits": 889454592.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.24": { |
| "accuracy": 0.977630615234375, |
| "total_bits": 889454592.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.25": { |
| "accuracy": 0.9701690673828125, |
| "total_bits": 889454592.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.26": { |
| "accuracy": 0.9687042236328125, |
| "total_bits": 889454592.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.27": { |
| "accuracy": 0.970367431640625, |
| "total_bits": 889454592.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.28": { |
| "accuracy": 0.9718780517578125, |
| "total_bits": 889454592.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.29": { |
| "accuracy": 0.96551513671875, |
| "total_bits": 889454592.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.30": { |
| "accuracy": 0.959747314453125, |
| "total_bits": 889454592.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| }, |
| "model.layers.31": { |
| "accuracy": 0.9403076171875, |
| "total_bits": 889454592.0, |
| "o_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "down_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "q_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "k_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "v_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "gate_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| }, |
| "up_proj": { |
| "group_size": { |
| "4": 128 |
| }, |
| "bits": [ |
| 4 |
| ], |
| "bits_prop": [ |
| 1 |
| ], |
| "scale_bits": 4, |
| "scale_groups:": 32 |
| } |
| } |
| } |
| } |