amd
/

DeepSeek-R1-0528-MXFP4-v2

8-bit precision

Model card Files Files and versions

ichbinblau commited on 18 days ago

Commit

6d742ce

·

verified ·

1 Parent(s): b1e3dae

Update README.md

Files changed (1) hide show

README.md +6 -6

README.md CHANGED Viewed

@@ -44,7 +44,7 @@ python3 quantize_quark.py --model_dir $MODEL_DIR \
                           --skip_evaluation \
                           --multi_gpu \
                           --model_export hf_format \
-                          --output_dir amd/DeepSeek-R1-0528-MXFP4-V2
 ```
 # Deployment
@@ -61,9 +61,9 @@ The model was evaluated on AIME24, and GSM8K benchmarks using the [lm-evaluation
   <tr>
    <td><strong>Benchmark</strong>
    </td>
-   <td><strong>DeepSeek-R1-0528-MXFP4-V2 (non MTP) </strong>
    </td>
-   <td><strong>DeepSeek-R1-0528-MXFP4-V2 (MTP=3)</strong>
    </td>
   </tr>
   <tr>
@@ -91,7 +91,7 @@ The results of AIME24 and GSM8K, were obtained using forked [lm-evaluation-harne
 ### Launch Server
 ```
 #!/bin/bash
-MODEL=/models/amd/DeepSeek-R1-0528-MXFP4-V2
 LOG="sglang-serving.log"
 SGLANG_AITER_MLA_PERSIST=1 \
@@ -111,7 +111,7 @@ python3 -m sglang.launch_server \
 ### AIME24
 ```
 lm_eval --model local-completions \
-    --model_args model=/models/amd/DeepSeek-R1-0528-MXFP4-V2,base_url=http://0.0.0.0:8321/v1/completions,num_concurrent=999999,timeout=999999,tokenized_requests=False,max_length=32000,temperature=0.6,top_p=0.95 \
     --tasks aime24 \
     --num_fewshot 0 \
     --gen_kwargs "do_sample=True,temperature=0.6,top_p=0.95,max_tokens=32000" \
@@ -121,7 +121,7 @@ lm_eval --model local-completions \
 ### GSM8K
 ```
 lm_eval --model local-completions \
-    --model_args model=/models/amd/DeepSeek-R1-0528-MXFP4-V2,base_url=http://0.0.0.0:8321/v1/completions,num_concurrent=256,max_retries=10,max_gen_toks=2048,tokenized_requests=False \
     --tasks gsm8k \
     --num_fewshot 5 \
     --batch_size auto 2>&1 | tee gsm8k.log

                           --skip_evaluation \
                           --multi_gpu \
                           --model_export hf_format \
+                          --output_dir amd/DeepSeek-R1-0528-MXFP4-v2
 ```
 # Deployment
   <tr>
    <td><strong>Benchmark</strong>
    </td>
+   <td><strong>DeepSeek-R1-0528-MXFP4-v2 (non MTP) </strong>
    </td>
+   <td><strong>DeepSeek-R1-0528-MXFP4-v2 (MTP=3)</strong>
    </td>
   </tr>
   <tr>
 ### Launch Server
 ```
 #!/bin/bash
+MODEL=/models/amd/DeepSeek-R1-0528-MXFP4-v2
 LOG="sglang-serving.log"
 SGLANG_AITER_MLA_PERSIST=1 \
 ### AIME24
 ```
 lm_eval --model local-completions \
+    --model_args model=/models/amd/DeepSeek-R1-0528-MXFP4-v2,base_url=http://0.0.0.0:8321/v1/completions,num_concurrent=999999,timeout=999999,tokenized_requests=False,max_length=32000,temperature=0.6,top_p=0.95 \
     --tasks aime24 \
     --num_fewshot 0 \
     --gen_kwargs "do_sample=True,temperature=0.6,top_p=0.95,max_tokens=32000" \
 ### GSM8K
 ```
 lm_eval --model local-completions \
+    --model_args model=/models/amd/DeepSeek-R1-0528-MXFP4-v2,base_url=http://0.0.0.0:8321/v1/completions,num_concurrent=256,max_retries=10,max_gen_toks=2048,tokenized_requests=False \
     --tasks gsm8k \
     --num_fewshot 5 \
     --batch_size auto 2>&1 | tee gsm8k.log