File size: 422 Bytes
cdff52b
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
{
  "quantization_method": "bitsandbytes_int8",
  "load_in_8bit": true,
  "llm_int8_threshold": 6.0,
  "expected_vram_gb": 95,
  "expected_total_memory_gb": 100,
  "notes": "Load with BitsAndBytesConfig for INT8 quantization. VAE and attention layers kept in full precision.",
  "attention_layers_quantized": false,
  "quality_vs_nf4": "Significantly better - approximately 2x memory for ~98% quality retention"
}