haoyang-amd commited on
Commit
4d8af73
·
verified ·
1 Parent(s): 96f621b

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +3 -3
README.md CHANGED
@@ -40,13 +40,13 @@ You can either perform the dequantization manually using this [conversion script
40
  ```
41
  cd Quark/examples/torch/language_modeling/llm_ptq/
42
  export exclude_layers="*mlp.gate.* *lm_head model.layers.61.eh_proj model.layers.61.shared_head.head model.layers.61.embed_tokens"
43
- python3 quantize_quark.py --model_dir /shareddata/amd/DeepSeek-R1-0528-BF16 \
44
  --quant_scheme mxfp4 \
45
  --layer_quant_scheme '*self_attn*' ptpc_fp8 \
46
  --exclude_layers $exclude_layers \
47
  --skip_evaluation \
48
  --model_export hf_format \
49
- --output_dir /shareddata/amd/DeepSeek-R1-0528-MoE-MTP-MXFP4-Attn-PTPC-FP8 \
50
  --multi_gpu
51
  ```
52
 
@@ -58,7 +58,7 @@ python3 quantize_quark.py --model_dir /shareddata/amd/DeepSeek-R1-0528-BF16 \
58
  </td>
59
  <td><strong>DeepSeek-R1-0528</strong>
60
  </td>
61
- <td><strong>DeepSeek-R1-0528-MTP-MoE-MXFP4-Attn-PTPC-FP8(this model)</strong>
62
  </td>
63
  </tr>
64
  <tr>
 
40
  ```
41
  cd Quark/examples/torch/language_modeling/llm_ptq/
42
  export exclude_layers="*mlp.gate.* *lm_head model.layers.61.eh_proj model.layers.61.shared_head.head model.layers.61.embed_tokens"
43
+ python3 quantize_quark.py --model_dir /amd/DeepSeek-R1-0528-BF16 \
44
  --quant_scheme mxfp4 \
45
  --layer_quant_scheme '*self_attn*' ptpc_fp8 \
46
  --exclude_layers $exclude_layers \
47
  --skip_evaluation \
48
  --model_export hf_format \
49
+ --output_dir amd/DeepSeek-R1-0528-MXFP4-MTP-MoEFP48 \
50
  --multi_gpu
51
  ```
52
 
 
58
  </td>
59
  <td><strong>DeepSeek-R1-0528</strong>
60
  </td>
61
+ <td><strong>DeepSeek-R1-0528-MXFP4-MTP-MoEFP4(this model)</strong>
62
  </td>
63
  </tr>
64
  <tr>