recipe.yaml · pshashid/llama3.1B_8B_SQL_Finetuned

Upload NVFP4 quantized model with FP8 KV-cache

0f97414 verified 8 days ago

496 Bytes

	default_stage:
	default_modifiers:
	QuantizationModifier:
	targets: [Linear]
	ignore: [lm_head]
	scheme: NVFP4
	kv_cache_scheme:
	num_bits: 8
	type: float
	symmetric: true
	group_size: null
	strategy: tensor
	block_structure: null
	dynamic: false
	actorder: null
	scale_dtype: null
	zp_dtype: null
	observer: memoryless_minmax
	observer_kwargs: {}
	bypass_divisibility_checks: false