openPangu-R-72B-2512-mxfp4 / openpangu_best_practice.yaml
QuantTaskForce's picture
Upload MXFP4 quantized OpenPangu model
62a3a8a verified
apiversion: modelslim_v1
spec:
process:
- type: flex_smooth_quant
proxy_quant_dtype: mxfp4
proxy_quant_axes: -1
enable_subgraph_type:
- up-down
include:
- '*'
exclude:
- model.layers.50*
- model.layers.1.*
- model.layers.2.*
- model.layers.3.*
- model.layers.0.*
- '*mlp.shared_experts*'
- type: linear_quant
qconfig:
act:
scope: per_block
dtype: mxfp4
symmetric: true
method: minmax
weight:
scope: per_block
dtype: mxfp4
symmetric: true
method: minmax
include:
- '*'
exclude:
- model.layers.1.*
- model.layers.2.*
- model.layers.3.*
- model.layers.0.*
- '*attn*'
- '*mlp.shared_experts*'
- '*mlp.gate'
- model.layers.50*
dataset: mix_calib.jsonl
save:
- type: ascendv1_saver
part_file_size: 4
metadata:
config_id: openpangu_mxfp4_e2m1
score: 50.0
label:
w_bit: 4
a_bit: 4
is_sparse: false
kv_cache: false
verified_model_types: []
verified_tags: {}
default_mxfp4_e2m1:
act:
scope: per_block
dtype: mxfp4
symmetric: true
method: minmax
weight:
scope: per_block
dtype: mxfp4
symmetric: true
method: minmax