File size: 1,258 Bytes
62a3a8a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 | apiversion: modelslim_v1
spec:
process:
- type: flex_smooth_quant
proxy_quant_dtype: mxfp4
proxy_quant_axes: -1
enable_subgraph_type:
- up-down
include:
- '*'
exclude:
- model.layers.50*
- model.layers.1.*
- model.layers.2.*
- model.layers.3.*
- model.layers.0.*
- '*mlp.shared_experts*'
- type: linear_quant
qconfig:
act:
scope: per_block
dtype: mxfp4
symmetric: true
method: minmax
weight:
scope: per_block
dtype: mxfp4
symmetric: true
method: minmax
include:
- '*'
exclude:
- model.layers.1.*
- model.layers.2.*
- model.layers.3.*
- model.layers.0.*
- '*attn*'
- '*mlp.shared_experts*'
- '*mlp.gate'
- model.layers.50*
dataset: mix_calib.jsonl
save:
- type: ascendv1_saver
part_file_size: 4
metadata:
config_id: openpangu_mxfp4_e2m1
score: 50.0
label:
w_bit: 4
a_bit: 4
is_sparse: false
kv_cache: false
verified_model_types: []
verified_tags: {}
default_mxfp4_e2m1:
act:
scope: per_block
dtype: mxfp4
symmetric: true
method: minmax
weight:
scope: per_block
dtype: mxfp4
symmetric: true
method: minmax
|