QuantTaskForce
/

openPangu-R-72B-2512-mxfp4

Model card Files Files and versions

openPangu-R-72B-2512-mxfp4 / openpangu_best_practice.yaml

QuantTaskForce's picture

Upload MXFP4 quantized OpenPangu model

62a3a8a verified 9 days ago

history blame contribute delete

1.26 kB

	apiversion: modelslim_v1
	spec:
	process:
	- type: flex_smooth_quant
	proxy_quant_dtype: mxfp4
	proxy_quant_axes: -1
	enable_subgraph_type:
	- up-down
	include:
	- '*'
	exclude:
	- model.layers.50*
	- model.layers.1.*
	- model.layers.2.*
	- model.layers.3.*
	- model.layers.0.*
	- 'mlp.shared_experts'
	- type: linear_quant
	qconfig:
	act:
	scope: per_block
	dtype: mxfp4
	symmetric: true
	method: minmax
	weight:
	scope: per_block
	dtype: mxfp4
	symmetric: true
	method: minmax
	include:
	- '*'
	exclude:
	- model.layers.1.*
	- model.layers.2.*
	- model.layers.3.*
	- model.layers.0.*
	- 'attn'
	- 'mlp.shared_experts'
	- '*mlp.gate'
	- model.layers.50*
	dataset: mix_calib.jsonl
	save:
	- type: ascendv1_saver
	part_file_size: 4
	metadata:
	config_id: openpangu_mxfp4_e2m1
	score: 50.0
	label:
	w_bit: 4
	a_bit: 4
	is_sparse: false
	kv_cache: false
	verified_model_types: []
	verified_tags: {}
	default_mxfp4_e2m1:
	act:
	scope: per_block
	dtype: mxfp4
	symmetric: true
	method: minmax
	weight:
	scope: per_block
	dtype: mxfp4
	symmetric: true
	method: minmax