{ "architectures": ["GPTQForCausalLM"], "model_type": "gptq", "quantization_config": { "bits": 4, "group_size": 128, "desc_act": false }, "torch_dtype": "float16", "trust_remote_code": true }