| { | |
| "input_dir": "/root/vllmbench/checkpoints/BitNet-2B-BF16", | |
| "output_dir": "/root/vllmbench/checkpoints_slidesparse/BitNet-2B-BF16-SlideSparse-2_2", | |
| "config": { | |
| "Z": 2, | |
| "L": 2, | |
| "N": 1, | |
| "window_size": 4, | |
| "stride": 2, | |
| "num_windows": 0, | |
| "expand_ratio": 0.0, | |
| "in_group_size": 2, | |
| "out_group_size": 0 | |
| }, | |
| "mode": "magnitude", | |
| "skip_prune": false, | |
| "skip_slide": true, | |
| "skip_compress": true, | |
| "use_real_cusparselt": true, | |
| "files": [ | |
| { | |
| "file": "model.safetensors", | |
| "layers": [ | |
| { | |
| "key": "model.layers.0.mlp.down_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.15199788411458334 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.0.mlp.gate_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.17113432707609955 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.0.mlp.up_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.16855446144386574 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.0.self_attn.k_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.256956787109375 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.0.self_attn.o_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.2794976806640625 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.0.self_attn.q_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.30107269287109373 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.0.self_attn.v_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.1873193359375 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.1.mlp.down_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.3483530680338542 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.1.mlp.gate_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.48345709906684026 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.1.mlp.up_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.48394843207465277 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.1.self_attn.k_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.189381103515625 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.1.self_attn.o_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.22034423828125 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.1.self_attn.q_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.2115972900390625 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.1.self_attn.v_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.181151123046875 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.10.mlp.down_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.17935282389322918 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.10.mlp.gate_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.18504457826967594 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.10.mlp.up_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.1506580494068287 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.10.self_attn.k_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.3022802734375 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.10.self_attn.o_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.1412506103515625 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.10.self_attn.q_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.278251953125 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.10.self_attn.v_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.176573486328125 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.11.mlp.down_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.14923502604166666 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.11.mlp.gate_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.176190185546875 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.11.mlp.up_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.16084493001302083 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.11.self_attn.k_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.242635498046875 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.11.self_attn.o_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.128812255859375 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.11.self_attn.q_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.2742071533203125 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.11.self_attn.v_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.160145263671875 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.12.mlp.down_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.1784042923538773 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.12.mlp.gate_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.18707829228153935 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.12.mlp.up_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.167960046838831 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.12.self_attn.k_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.28708740234375 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.12.self_attn.o_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.17186126708984376 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.12.self_attn.q_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.2387982177734375 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.12.self_attn.v_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.16762939453125 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.13.mlp.down_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.17230473271122684 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.13.mlp.gate_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.18391441062644676 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.13.mlp.up_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.14585254810474538 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.13.self_attn.k_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.228272705078125 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.13.self_attn.o_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.1856365966796875 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.13.self_attn.q_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.21499114990234375 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.13.self_attn.v_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.172703857421875 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.14.mlp.down_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.19432203504774306 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.14.mlp.gate_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.19343894675925927 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.14.mlp.up_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.16939380787037037 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.14.self_attn.k_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.3281884765625 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.14.self_attn.o_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.16054779052734375 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.14.self_attn.q_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.277701416015625 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.14.self_attn.v_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.11505859375 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.15.mlp.down_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.1797210693359375 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.15.mlp.gate_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.19692936650028936 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.15.mlp.up_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.17652644404658566 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.15.self_attn.k_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.15416015625 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.15.self_attn.o_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.12657806396484375 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.15.self_attn.q_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.2110638427734375 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.15.self_attn.v_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.091549072265625 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.16.mlp.down_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.18986194751880786 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.16.mlp.gate_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.15843031141493055 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.16.mlp.up_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.17178627296730323 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.16.self_attn.k_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.218714599609375 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.16.self_attn.o_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.18352264404296875 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.16.self_attn.q_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.23088409423828124 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.16.self_attn.v_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.1065673828125 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.17.mlp.down_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.14451587818287037 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.17.mlp.gate_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.19717282895688656 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.17.mlp.up_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.16488783094618056 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.17.self_attn.k_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.31508544921875 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.17.self_attn.o_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.1187261962890625 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.17.self_attn.q_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.2463275146484375 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.17.self_attn.v_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.20371337890625 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.18.mlp.down_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.16681428132233797 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.18.mlp.gate_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.1786736382378472 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.18.mlp.up_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.15566112377025462 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.18.self_attn.k_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.213857421875 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.18.self_attn.o_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.20393035888671876 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.18.self_attn.q_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.2262286376953125 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.18.self_attn.v_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.09686279296875 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.19.mlp.down_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.1752655029296875 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.19.mlp.gate_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.166396642614294 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.19.mlp.up_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.204178308557581 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.19.self_attn.k_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.17760009765625 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.19.self_attn.o_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.12464263916015625 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.19.self_attn.q_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.19770599365234376 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.19.self_attn.v_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.122510986328125 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.2.mlp.down_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.2550498679832176 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.2.mlp.gate_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.3559205231843171 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.2.mlp.up_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.3575468840422454 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.2.self_attn.k_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.15003173828125 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.2.self_attn.o_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.20570770263671875 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.2.self_attn.q_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.18801513671875 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.2.self_attn.v_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.150972900390625 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.20.mlp.down_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.18525673195167824 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.20.mlp.gate_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.1829010009765625 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.20.mlp.up_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.19712739167390048 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.20.self_attn.k_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.319686279296875 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.20.self_attn.o_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.18260040283203124 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.20.self_attn.q_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.2580020141601562 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.20.self_attn.v_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.154796142578125 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.21.mlp.down_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.14428337944878472 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.21.mlp.gate_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.17313514992042825 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.21.mlp.up_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.15726759168836807 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.21.self_attn.k_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.219298095703125 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.21.self_attn.o_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.20331451416015625 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.21.self_attn.q_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.25748626708984373 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.21.self_attn.v_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.155692138671875 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.22.mlp.down_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.17556468822337962 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.22.mlp.gate_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.18298170301649305 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.22.mlp.up_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.15903331615306712 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.22.self_attn.k_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.229600830078125 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.22.self_attn.o_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.20516082763671875 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.22.self_attn.q_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.22682281494140624 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.22.self_attn.v_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.1089990234375 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.23.mlp.down_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.16933186848958334 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.23.mlp.gate_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.15015417028356481 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.23.mlp.up_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.15995099103009258 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.23.self_attn.k_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.2611669921875 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.23.self_attn.o_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.15424041748046874 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.23.self_attn.q_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.25669921875 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.23.self_attn.v_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.150548095703125 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.24.mlp.down_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.15282999674479167 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.24.mlp.gate_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.17266212745949075 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.24.mlp.up_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.17506759078414352 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.24.self_attn.k_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.270911865234375 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.24.self_attn.o_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.17739471435546875 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.24.self_attn.q_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.26747161865234376 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.24.self_attn.v_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.176749267578125 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.25.mlp.down_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.16077451352719907 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.25.mlp.gate_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.15167134602864582 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.25.mlp.up_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.16715630425347222 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.25.self_attn.k_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.177279052734375 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.25.self_attn.o_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.18887603759765625 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.25.self_attn.q_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.2086029052734375 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.25.self_attn.v_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.12677978515625 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.26.mlp.down_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.13425281665943287 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.26.mlp.gate_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.15587825068721065 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.26.mlp.up_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.18711694788049768 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.26.self_attn.k_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.188653564453125 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.26.self_attn.o_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.1992559814453125 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.26.self_attn.q_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.21903289794921876 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.26.self_attn.v_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.137728271484375 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.27.mlp.down_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.1363655372902199 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.27.mlp.gate_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.2003430401837384 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.27.mlp.up_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.19456154152199073 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.27.self_attn.k_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.22201171875 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.27.self_attn.o_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.1796588134765625 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.27.self_attn.q_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.2278948974609375 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.27.self_attn.v_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.1123095703125 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.28.mlp.down_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.16350075050636573 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.28.mlp.gate_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.1676529495804398 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.28.mlp.up_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.1866291187427662 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.28.self_attn.k_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.183768310546875 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.28.self_attn.o_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.19795166015625 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.28.self_attn.q_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.18107330322265625 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.28.self_attn.v_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.15234619140625 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.29.mlp.down_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.16791845251012733 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.29.mlp.gate_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.20634200484664353 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.29.mlp.up_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.17300256799768518 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.29.self_attn.k_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.16551025390625 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.29.self_attn.o_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.3195074462890625 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.29.self_attn.q_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.1834649658203125 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.29.self_attn.v_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.247176513671875 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.3.mlp.down_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.21333380805121527 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.3.mlp.gate_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.2500740333839699 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.3.mlp.up_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.2590979682074653 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.3.self_attn.k_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.173878173828125 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.3.self_attn.o_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.18900421142578125 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.3.self_attn.q_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.20758697509765625 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.3.self_attn.v_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.152174072265625 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.4.mlp.down_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.17481248643663194 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.4.mlp.gate_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.2107018364800347 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.4.mlp.up_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.2166152388961227 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.4.self_attn.k_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.198648681640625 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.4.self_attn.o_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.1776983642578125 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.4.self_attn.q_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.21193206787109375 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.4.self_attn.v_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.142794189453125 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.5.mlp.down_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.18577213993778935 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.5.mlp.gate_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.1812286376953125 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.5.mlp.up_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.18417573151765046 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.5.self_attn.k_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.234036865234375 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.5.self_attn.o_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.20351898193359375 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.5.self_attn.q_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.2208203125 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.5.self_attn.v_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.15800537109375 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.6.mlp.down_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.1422286422164352 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.6.mlp.gate_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.18378974066840278 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.6.mlp.up_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.18515082465277777 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.6.self_attn.k_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.19806640625 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.6.self_attn.o_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.155452880859375 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.6.self_attn.q_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.25316802978515623 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.6.self_attn.v_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.12183837890625 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.7.mlp.down_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.14323628743489583 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.7.mlp.gate_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.16950107150607638 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.7.mlp.up_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.1680545383029514 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.7.self_attn.k_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.202620849609375 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.7.self_attn.o_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.16763763427734374 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.7.self_attn.q_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.22835784912109375 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.7.self_attn.v_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.130732421875 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.8.mlp.down_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.1428402370876736 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.8.mlp.gate_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.16652662489149306 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.8.mlp.up_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.1641455756293403 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.8.self_attn.k_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.23819091796875 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.8.self_attn.o_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.1687322998046875 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.8.self_attn.q_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.25700439453125 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.8.self_attn.v_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.12066162109375 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.9.mlp.down_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 6912 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.1411677607783565 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.9.mlp.gate_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.1797061496310764 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.9.mlp.up_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 6912, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.15515894006799769 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.9.self_attn.k_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.2318359375 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.9.self_attn.o_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.1947369384765625 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.9.self_attn.q_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 2560, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.1998541259765625 | |
| } | |
| ] | |
| } | |
| }, | |
| { | |
| "key": "model.layers.9.self_attn.v_proj.weight", | |
| "result": { | |
| "original_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "final_shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "stages": [ | |
| { | |
| "name": "quant_prune", | |
| "shape": [ | |
| 640, | |
| 2560 | |
| ], | |
| "output_dtype": "int8", | |
| "ZL_valid": false, | |
| "ZL_valid_ratio": 0.124481201171875 | |
| } | |
| ] | |
| } | |
| } | |
| ], | |
| "skipped": [ | |
| "model.embed_tokens.weight", | |
| "model.layers.0.input_layernorm.weight", | |
| "model.layers.0.mlp.ffn_sub_norm.weight", | |
| "model.layers.0.post_attention_layernorm.weight", | |
| "model.layers.0.self_attn.attn_sub_norm.weight", | |
| "model.layers.1.input_layernorm.weight", | |
| "model.layers.1.mlp.ffn_sub_norm.weight", | |
| "model.layers.1.post_attention_layernorm.weight", | |
| "model.layers.1.self_attn.attn_sub_norm.weight", | |
| "model.layers.10.input_layernorm.weight", | |
| "model.layers.10.mlp.ffn_sub_norm.weight", | |
| "model.layers.10.post_attention_layernorm.weight", | |
| "model.layers.10.self_attn.attn_sub_norm.weight", | |
| "model.layers.11.input_layernorm.weight", | |
| "model.layers.11.mlp.ffn_sub_norm.weight", | |
| "model.layers.11.post_attention_layernorm.weight", | |
| "model.layers.11.self_attn.attn_sub_norm.weight", | |
| "model.layers.12.input_layernorm.weight", | |
| "model.layers.12.mlp.ffn_sub_norm.weight", | |
| "model.layers.12.post_attention_layernorm.weight", | |
| "model.layers.12.self_attn.attn_sub_norm.weight", | |
| "model.layers.13.input_layernorm.weight", | |
| "model.layers.13.mlp.ffn_sub_norm.weight", | |
| "model.layers.13.post_attention_layernorm.weight", | |
| "model.layers.13.self_attn.attn_sub_norm.weight", | |
| "model.layers.14.input_layernorm.weight", | |
| "model.layers.14.mlp.ffn_sub_norm.weight", | |
| "model.layers.14.post_attention_layernorm.weight", | |
| "model.layers.14.self_attn.attn_sub_norm.weight", | |
| "model.layers.15.input_layernorm.weight", | |
| "model.layers.15.mlp.ffn_sub_norm.weight", | |
| "model.layers.15.post_attention_layernorm.weight", | |
| "model.layers.15.self_attn.attn_sub_norm.weight", | |
| "model.layers.16.input_layernorm.weight", | |
| "model.layers.16.mlp.ffn_sub_norm.weight", | |
| "model.layers.16.post_attention_layernorm.weight", | |
| "model.layers.16.self_attn.attn_sub_norm.weight", | |
| "model.layers.17.input_layernorm.weight", | |
| "model.layers.17.mlp.ffn_sub_norm.weight", | |
| "model.layers.17.post_attention_layernorm.weight", | |
| "model.layers.17.self_attn.attn_sub_norm.weight", | |
| "model.layers.18.input_layernorm.weight", | |
| "model.layers.18.mlp.ffn_sub_norm.weight", | |
| "model.layers.18.post_attention_layernorm.weight", | |
| "model.layers.18.self_attn.attn_sub_norm.weight", | |
| "model.layers.19.input_layernorm.weight", | |
| "model.layers.19.mlp.ffn_sub_norm.weight", | |
| "model.layers.19.post_attention_layernorm.weight", | |
| "model.layers.19.self_attn.attn_sub_norm.weight", | |
| "model.layers.2.input_layernorm.weight", | |
| "model.layers.2.mlp.ffn_sub_norm.weight", | |
| "model.layers.2.post_attention_layernorm.weight", | |
| "model.layers.2.self_attn.attn_sub_norm.weight", | |
| "model.layers.20.input_layernorm.weight", | |
| "model.layers.20.mlp.ffn_sub_norm.weight", | |
| "model.layers.20.post_attention_layernorm.weight", | |
| "model.layers.20.self_attn.attn_sub_norm.weight", | |
| "model.layers.21.input_layernorm.weight", | |
| "model.layers.21.mlp.ffn_sub_norm.weight", | |
| "model.layers.21.post_attention_layernorm.weight", | |
| "model.layers.21.self_attn.attn_sub_norm.weight", | |
| "model.layers.22.input_layernorm.weight", | |
| "model.layers.22.mlp.ffn_sub_norm.weight", | |
| "model.layers.22.post_attention_layernorm.weight", | |
| "model.layers.22.self_attn.attn_sub_norm.weight", | |
| "model.layers.23.input_layernorm.weight", | |
| "model.layers.23.mlp.ffn_sub_norm.weight", | |
| "model.layers.23.post_attention_layernorm.weight", | |
| "model.layers.23.self_attn.attn_sub_norm.weight", | |
| "model.layers.24.input_layernorm.weight", | |
| "model.layers.24.mlp.ffn_sub_norm.weight", | |
| "model.layers.24.post_attention_layernorm.weight", | |
| "model.layers.24.self_attn.attn_sub_norm.weight", | |
| "model.layers.25.input_layernorm.weight", | |
| "model.layers.25.mlp.ffn_sub_norm.weight", | |
| "model.layers.25.post_attention_layernorm.weight", | |
| "model.layers.25.self_attn.attn_sub_norm.weight", | |
| "model.layers.26.input_layernorm.weight", | |
| "model.layers.26.mlp.ffn_sub_norm.weight", | |
| "model.layers.26.post_attention_layernorm.weight", | |
| "model.layers.26.self_attn.attn_sub_norm.weight", | |
| "model.layers.27.input_layernorm.weight", | |
| "model.layers.27.mlp.ffn_sub_norm.weight", | |
| "model.layers.27.post_attention_layernorm.weight", | |
| "model.layers.27.self_attn.attn_sub_norm.weight", | |
| "model.layers.28.input_layernorm.weight", | |
| "model.layers.28.mlp.ffn_sub_norm.weight", | |
| "model.layers.28.post_attention_layernorm.weight", | |
| "model.layers.28.self_attn.attn_sub_norm.weight", | |
| "model.layers.29.input_layernorm.weight", | |
| "model.layers.29.mlp.ffn_sub_norm.weight", | |
| "model.layers.29.post_attention_layernorm.weight", | |
| "model.layers.29.self_attn.attn_sub_norm.weight", | |
| "model.layers.3.input_layernorm.weight", | |
| "model.layers.3.mlp.ffn_sub_norm.weight", | |
| "model.layers.3.post_attention_layernorm.weight", | |
| "model.layers.3.self_attn.attn_sub_norm.weight", | |
| "model.layers.4.input_layernorm.weight", | |
| "model.layers.4.mlp.ffn_sub_norm.weight", | |
| "model.layers.4.post_attention_layernorm.weight", | |
| "model.layers.4.self_attn.attn_sub_norm.weight", | |
| "model.layers.5.input_layernorm.weight", | |
| "model.layers.5.mlp.ffn_sub_norm.weight", | |
| "model.layers.5.post_attention_layernorm.weight", | |
| "model.layers.5.self_attn.attn_sub_norm.weight", | |
| "model.layers.6.input_layernorm.weight", | |
| "model.layers.6.mlp.ffn_sub_norm.weight", | |
| "model.layers.6.post_attention_layernorm.weight", | |
| "model.layers.6.self_attn.attn_sub_norm.weight", | |
| "model.layers.7.input_layernorm.weight", | |
| "model.layers.7.mlp.ffn_sub_norm.weight", | |
| "model.layers.7.post_attention_layernorm.weight", | |
| "model.layers.7.self_attn.attn_sub_norm.weight", | |
| "model.layers.8.input_layernorm.weight", | |
| "model.layers.8.mlp.ffn_sub_norm.weight", | |
| "model.layers.8.post_attention_layernorm.weight", | |
| "model.layers.8.self_attn.attn_sub_norm.weight", | |
| "model.layers.9.input_layernorm.weight", | |
| "model.layers.9.mlp.ffn_sub_norm.weight", | |
| "model.layers.9.post_attention_layernorm.weight", | |
| "model.layers.9.self_attn.attn_sub_norm.weight", | |
| "model.norm.weight" | |
| ] | |
| } | |
| ], | |
| "total_layers_processed": 210, | |
| "total_layers_skipped": 122, | |
| "elapsed_time": 8.408682823181152 | |
| } |