| { | |
| "bits": 4, | |
| "data_type": "int", | |
| "group_size": 128, | |
| "sym": true, | |
| "low_gpu_mem_usage": true, | |
| "autoround_version": "0.13.1", | |
| "block_name_to_quantize": "model.layers", | |
| "quant_method": "auto-round", | |
| "packing_format": "auto_round:auto_gptq", | |
| "extra_config": { | |
| ".*model\\.layers\\.[1-48]\\.mlp\\.gate.*": { | |
| "bits": 16, | |
| "data_type": "float" | |
| } | |
| } | |
| } |