HunyuanImage-3.0-Instruct-INT8 / quantization_metadata.json

Upload folder using huggingface_hub

0751b6e verified 5 days ago

666 Bytes

	{
	"model_type": "HunyuanImage-3.0-Instruct",
	"quantization_method": "bitsandbytes_int8",
	"load_in_8bit": true,
	"llm_int8_threshold": 6.0,
	"expected_vram_gb": 95,
	"expected_total_memory_gb": 100,
	"modules_kept_bf16": [
	"vae",
	"vision_model",
	"vision_aligner",
	"patch_embed",
	"final_layer",
	"time_embed",
	"time_embed_2",
	"timestep_emb",
	"attention_projections"
	],
	"notes": "Instruct model with vision encoder kept at BF16 for image understanding quality.",
	"attention_layers_quantized": false,
	"quality_vs_nf4": "Significantly better - approximately 2x memory for ~98% quality retention"
	}