{
  "architectures": ["GPTQForCausalLM"],
  "model_type": "gptq",
  "quantization_config": {
    "bits": 4,
    "group_size": 128,
    "desc_act": false
  },
  "torch_dtype": "float16",
  "trust_remote_code": true
}