ichbinblau commited on
Commit
6d742ce
·
verified ·
1 Parent(s): b1e3dae

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +6 -6
README.md CHANGED
@@ -44,7 +44,7 @@ python3 quantize_quark.py --model_dir $MODEL_DIR \
44
  --skip_evaluation \
45
  --multi_gpu \
46
  --model_export hf_format \
47
- --output_dir amd/DeepSeek-R1-0528-MXFP4-V2
48
  ```
49
 
50
  # Deployment
@@ -61,9 +61,9 @@ The model was evaluated on AIME24, and GSM8K benchmarks using the [lm-evaluation
61
  <tr>
62
  <td><strong>Benchmark</strong>
63
  </td>
64
- <td><strong>DeepSeek-R1-0528-MXFP4-V2 (non MTP) </strong>
65
  </td>
66
- <td><strong>DeepSeek-R1-0528-MXFP4-V2 (MTP=3)</strong>
67
  </td>
68
  </tr>
69
  <tr>
@@ -91,7 +91,7 @@ The results of AIME24 and GSM8K, were obtained using forked [lm-evaluation-harne
91
  ### Launch Server
92
  ```
93
  #!/bin/bash
94
- MODEL=/models/amd/DeepSeek-R1-0528-MXFP4-V2
95
  LOG="sglang-serving.log"
96
 
97
  SGLANG_AITER_MLA_PERSIST=1 \
@@ -111,7 +111,7 @@ python3 -m sglang.launch_server \
111
  ### AIME24
112
  ```
113
  lm_eval --model local-completions \
114
- --model_args model=/models/amd/DeepSeek-R1-0528-MXFP4-V2,base_url=http://0.0.0.0:8321/v1/completions,num_concurrent=999999,timeout=999999,tokenized_requests=False,max_length=32000,temperature=0.6,top_p=0.95 \
115
  --tasks aime24 \
116
  --num_fewshot 0 \
117
  --gen_kwargs "do_sample=True,temperature=0.6,top_p=0.95,max_tokens=32000" \
@@ -121,7 +121,7 @@ lm_eval --model local-completions \
121
  ### GSM8K
122
  ```
123
  lm_eval --model local-completions \
124
- --model_args model=/models/amd/DeepSeek-R1-0528-MXFP4-V2,base_url=http://0.0.0.0:8321/v1/completions,num_concurrent=256,max_retries=10,max_gen_toks=2048,tokenized_requests=False \
125
  --tasks gsm8k \
126
  --num_fewshot 5 \
127
  --batch_size auto 2>&1 | tee gsm8k.log
 
44
  --skip_evaluation \
45
  --multi_gpu \
46
  --model_export hf_format \
47
+ --output_dir amd/DeepSeek-R1-0528-MXFP4-v2
48
  ```
49
 
50
  # Deployment
 
61
  <tr>
62
  <td><strong>Benchmark</strong>
63
  </td>
64
+ <td><strong>DeepSeek-R1-0528-MXFP4-v2 (non MTP) </strong>
65
  </td>
66
+ <td><strong>DeepSeek-R1-0528-MXFP4-v2 (MTP=3)</strong>
67
  </td>
68
  </tr>
69
  <tr>
 
91
  ### Launch Server
92
  ```
93
  #!/bin/bash
94
+ MODEL=/models/amd/DeepSeek-R1-0528-MXFP4-v2
95
  LOG="sglang-serving.log"
96
 
97
  SGLANG_AITER_MLA_PERSIST=1 \
 
111
  ### AIME24
112
  ```
113
  lm_eval --model local-completions \
114
+ --model_args model=/models/amd/DeepSeek-R1-0528-MXFP4-v2,base_url=http://0.0.0.0:8321/v1/completions,num_concurrent=999999,timeout=999999,tokenized_requests=False,max_length=32000,temperature=0.6,top_p=0.95 \
115
  --tasks aime24 \
116
  --num_fewshot 0 \
117
  --gen_kwargs "do_sample=True,temperature=0.6,top_p=0.95,max_tokens=32000" \
 
121
  ### GSM8K
122
  ```
123
  lm_eval --model local-completions \
124
+ --model_args model=/models/amd/DeepSeek-R1-0528-MXFP4-v2,base_url=http://0.0.0.0:8321/v1/completions,num_concurrent=256,max_retries=10,max_gen_toks=2048,tokenized_requests=False \
125
  --tasks gsm8k \
126
  --num_fewshot 5 \
127
  --batch_size auto 2>&1 | tee gsm8k.log