QuantTaskForce
/

openPangu-R-72B-2512-mxfp4

QuantTaskForce commited on May 26

Commit

62a3a8a

verified ·

1 Parent(s): 79a1b3e

Upload MXFP4 quantized OpenPangu model

Files changed (1) hide show

openpangu_best_practice.yaml ADDED Viewed

+apiversion: modelslim_v1
+spec:
+  process:
+  - type: flex_smooth_quant
+    proxy_quant_dtype: mxfp4
+    proxy_quant_axes: -1
+    enable_subgraph_type:
+    - up-down
+    include:
+    - '*'
+    exclude:
+    - model.layers.50*
+    - model.layers.1.*
+    - model.layers.2.*
+    - model.layers.3.*
+    - model.layers.0.*
+    - '*mlp.shared_experts*'
+  - type: linear_quant
+    qconfig:
+      act:
+        scope: per_block
+        dtype: mxfp4
+        symmetric: true
+        method: minmax
+      weight:
+        scope: per_block
+        dtype: mxfp4
+        symmetric: true
+        method: minmax
+    include:
+    - '*'
+    exclude:
+    - model.layers.1.*
+    - model.layers.2.*
+    - model.layers.3.*
+    - model.layers.0.*
+    - '*attn*'
+    - '*mlp.shared_experts*'
+    - '*mlp.gate'
+    - model.layers.50*
+  dataset: mix_calib.jsonl
+  save:
+  - type: ascendv1_saver
+    part_file_size: 4
+metadata:
+  config_id: openpangu_mxfp4_e2m1
+  score: 50.0
+  label:
+    w_bit: 4
+    a_bit: 4
+    is_sparse: false
+    kv_cache: false
+  verified_model_types: []
+  verified_tags: {}
+default_mxfp4_e2m1:
+  act:
+    scope: per_block
+    dtype: mxfp4
+    symmetric: true
+    method: minmax
+  weight:
+    scope: per_block
+    dtype: mxfp4
+    symmetric: true
+    method: minmax