{ "model_type": "llama", "quantization_config": { "load_in_4bit": true, "bnb_4bit_compute_dtype": "float16" } }