File size: 366 Bytes
de9df89 | 1 2 3 4 5 6 7 8 9 10 | {
"quantization_method": "bitsandbytes_nf4",
"load_in_4bit": true,
"bnb_4bit_quant_type": "nf4",
"bnb_4bit_use_double_quant": true,
"bnb_4bit_compute_dtype": "torch.bfloat16",
"expected_vram_gb": 45,
"notes": "Load with BitsAndBytesConfig for NF4 quantization. Attention layers kept in full precision.",
"attention_layers_quantized": false
} |