{ "quantization_method": "bitsandbytes_int8", "load_in_8bit": true, "llm_int8_threshold": 6.0, "expected_vram_gb": 95, "expected_total_memory_gb": 100, "notes": "Load with BitsAndBytesConfig for INT8 quantization. VAE and attention layers kept in full precision.", "attention_layers_quantized": false, "quality_vs_nf4": "Significantly better - approximately 2x memory for ~98% quality retention" }