File size: 422 Bytes
cdff52b |
1 2 3 4 5 6 7 8 9 10 |
{
"quantization_method": "bitsandbytes_int8",
"load_in_8bit": true,
"llm_int8_threshold": 6.0,
"expected_vram_gb": 95,
"expected_total_memory_gb": 100,
"notes": "Load with BitsAndBytesConfig for INT8 quantization. VAE and attention layers kept in full precision.",
"attention_layers_quantized": false,
"quality_vs_nf4": "Significantly better - approximately 2x memory for ~98% quality retention"
} |