Qwen3-VL-8B-Instruct-NVFP4 / quantization_config.json

Upload folder using huggingface_hub

112f532 verified 2 months ago

373 Bytes

	{
	"bits": 4,
	"group_size": 16,
	"sym": true,
	"data_type": "nv_fp",
	"act_bits": 4,
	"act_group_size": 16,
	"act_sym": true,
	"act_dynamic": true,
	"act_data_type": "nv_fp4_with_static_gs",
	"autoround_version": "0.8.0",
	"block_name_to_quantize": "model.language_model.layers",
	"quant_method": "auto-round",
	"packing_format": "auto_round:llm_compressor"
	}