{ "bits": 4, "group_size": 128, "desc_act": false, "lm_head": false, "quant_method": "gptq", "checkpoint_format": "gptq", "pack_dtype": "int32", "meta": { "quantizer": [ "gptqmodel:5.7.0" ], "uri": "https://github.com/modelcloud/gptqmodel", "damp_percent": 0.05, "damp_auto_increment": 0.01, "static_groups": false, "true_sequential": true, "mse": 0.0, "gptaq": null, "act_group_aware": true, "failsafe": { "strategy": "rtn", "threshold": "0.5%", "smooth": { "type": "mad", "group_size_threshold": 128, "k": 2.75 } }, "offload_to_disk": true, "offload_to_disk_path": "./gptqmodel_offload/hqdpgrum-rkaakpxx/", "pack_impl": "cpu", "mock_quantization": false, "gc_mode": "interval", "wait_for_submodule_finalizers": false, "auto_forward_data_parallel": true, "hessian": { "chunk_size": null, "chunk_bytes": null, "staging_dtype": "float32" }, "vram_strategy": "exclusive" }, "sym": true, "format": "gptq" }