nm-testing
/

nvfp4_fp8_mixed-e2e

compressed-tensors

Model card Files Files and versions

nvfp4_fp8_mixed-e2e / recipe.yaml

nm-autobot's picture

Upload folder using huggingface_hub

e566e8a verified 19 days ago

History Blame Contribute Delete

1.94 kB

	quant_stage:
	quant_modifiers:
	QuantizationModifier:
	config_groups:
	group_0:
	targets: ['re:.*\.down_proj$']
	weights:
	num_bits: 8
	type: float
	symmetric: true
	group_size: null
	strategy: channel
	block_structure: null
	dynamic: false
	actorder: null
	scale_dtype: null
	zp_dtype: null
	observer: memoryless_minmax
	observer_kwargs: {}
	input_activations:
	num_bits: 8
	type: float
	symmetric: true
	group_size: null
	strategy: token
	block_structure: null
	dynamic: true
	actorder: null
	scale_dtype: null
	zp_dtype: null
	observer: null
	observer_kwargs: {}
	output_activations: null
	format: null
	group_1:
	targets: ['re:.self_attn\.(k\|o\|q\|v)_proj$', 're:.\.(gate\|up)_proj$']
	weights:
	num_bits: 4
	type: float
	symmetric: true
	group_size: 16
	strategy: tensor_group
	block_structure: null
	dynamic: false
	actorder: null
	scale_dtype: torch.float8_e4m3fn
	zp_dtype: null
	observer: memoryless_minmax
	observer_kwargs: {}
	input_activations:
	num_bits: 4
	type: float
	symmetric: true
	group_size: 16
	strategy: tensor_group
	block_structure: null
	dynamic: local
	actorder: null
	scale_dtype: null
	zp_dtype: null
	observer: minmax
	observer_kwargs: {}
	output_activations: null
	format: null
	targets: [Linear]
	ignore: [lm_head]
	bypass_divisibility_checks: false