Update README.md
Browse files
README.md
CHANGED
|
@@ -40,13 +40,13 @@ You can either perform the dequantization manually using this [conversion script
|
|
| 40 |
```
|
| 41 |
cd Quark/examples/torch/language_modeling/llm_ptq/
|
| 42 |
export exclude_layers="*mlp.gate.* *lm_head model.layers.61.eh_proj model.layers.61.shared_head.head model.layers.61.embed_tokens"
|
| 43 |
-
python3 quantize_quark.py --model_dir /
|
| 44 |
--quant_scheme mxfp4 \
|
| 45 |
--layer_quant_scheme '*self_attn*' ptpc_fp8 \
|
| 46 |
--exclude_layers $exclude_layers \
|
| 47 |
--skip_evaluation \
|
| 48 |
--model_export hf_format \
|
| 49 |
-
--output_dir
|
| 50 |
--multi_gpu
|
| 51 |
```
|
| 52 |
|
|
@@ -58,7 +58,7 @@ python3 quantize_quark.py --model_dir /shareddata/amd/DeepSeek-R1-0528-BF16 \
|
|
| 58 |
</td>
|
| 59 |
<td><strong>DeepSeek-R1-0528</strong>
|
| 60 |
</td>
|
| 61 |
-
<td><strong>DeepSeek-R1-0528-
|
| 62 |
</td>
|
| 63 |
</tr>
|
| 64 |
<tr>
|
|
|
|
| 40 |
```
|
| 41 |
cd Quark/examples/torch/language_modeling/llm_ptq/
|
| 42 |
export exclude_layers="*mlp.gate.* *lm_head model.layers.61.eh_proj model.layers.61.shared_head.head model.layers.61.embed_tokens"
|
| 43 |
+
python3 quantize_quark.py --model_dir /amd/DeepSeek-R1-0528-BF16 \
|
| 44 |
--quant_scheme mxfp4 \
|
| 45 |
--layer_quant_scheme '*self_attn*' ptpc_fp8 \
|
| 46 |
--exclude_layers $exclude_layers \
|
| 47 |
--skip_evaluation \
|
| 48 |
--model_export hf_format \
|
| 49 |
+
--output_dir amd/DeepSeek-R1-0528-MXFP4-MTP-MoEFP48 \
|
| 50 |
--multi_gpu
|
| 51 |
```
|
| 52 |
|
|
|
|
| 58 |
</td>
|
| 59 |
<td><strong>DeepSeek-R1-0528</strong>
|
| 60 |
</td>
|
| 61 |
+
<td><strong>DeepSeek-R1-0528-MXFP4-MTP-MoEFP4(this model)</strong>
|
| 62 |
</td>
|
| 63 |
</tr>
|
| 64 |
<tr>
|