QuantTaskForce commited on
Commit
62a3a8a
·
verified ·
1 Parent(s): 79a1b3e

Upload MXFP4 quantized OpenPangu model

Browse files
Files changed (1) hide show
  1. openpangu_best_practice.yaml +65 -0
openpangu_best_practice.yaml ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ apiversion: modelslim_v1
2
+ spec:
3
+ process:
4
+ - type: flex_smooth_quant
5
+ proxy_quant_dtype: mxfp4
6
+ proxy_quant_axes: -1
7
+ enable_subgraph_type:
8
+ - up-down
9
+ include:
10
+ - '*'
11
+ exclude:
12
+ - model.layers.50*
13
+ - model.layers.1.*
14
+ - model.layers.2.*
15
+ - model.layers.3.*
16
+ - model.layers.0.*
17
+ - '*mlp.shared_experts*'
18
+ - type: linear_quant
19
+ qconfig:
20
+ act:
21
+ scope: per_block
22
+ dtype: mxfp4
23
+ symmetric: true
24
+ method: minmax
25
+ weight:
26
+ scope: per_block
27
+ dtype: mxfp4
28
+ symmetric: true
29
+ method: minmax
30
+ include:
31
+ - '*'
32
+ exclude:
33
+ - model.layers.1.*
34
+ - model.layers.2.*
35
+ - model.layers.3.*
36
+ - model.layers.0.*
37
+ - '*attn*'
38
+ - '*mlp.shared_experts*'
39
+ - '*mlp.gate'
40
+ - model.layers.50*
41
+ dataset: mix_calib.jsonl
42
+ save:
43
+ - type: ascendv1_saver
44
+ part_file_size: 4
45
+ metadata:
46
+ config_id: openpangu_mxfp4_e2m1
47
+ score: 50.0
48
+ label:
49
+ w_bit: 4
50
+ a_bit: 4
51
+ is_sparse: false
52
+ kv_cache: false
53
+ verified_model_types: []
54
+ verified_tags: {}
55
+ default_mxfp4_e2m1:
56
+ act:
57
+ scope: per_block
58
+ dtype: mxfp4
59
+ symmetric: true
60
+ method: minmax
61
+ weight:
62
+ scope: per_block
63
+ dtype: mxfp4
64
+ symmetric: true
65
+ method: minmax