{
  "model_type": "llama",
  "quantization_config": {
    "load_in_4bit": true,
    "bnb_4bit_compute_dtype": "float16"
  }
}