jiaxwang commited on
Commit
8f07720
·
verified ·
1 Parent(s): add62b8

Update README.md

Browse files

change the model name

Files changed (1) hide show
  1. README.md +10 -10
README.md CHANGED
@@ -40,7 +40,7 @@ python3 internal_scripts/quantize_quark.py \
40
  --attention_dtype fp8 \
41
  --exclude_layers $exclude_layers \
42
  --num_calib_data 512 \
43
- --output_dir amd/gpt-oss120b-moe_w-mxfp4-a-fp8-attn_ptpc-kv-soft_fp8 \
44
  --model_export hf_format \
45
  --multi_gpu
46
  ```
@@ -61,7 +61,7 @@ The model was evaluated on AIME25 and GPQA Diamond benchmarks with `medium` reas
61
  </td>
62
  <td><strong>gpt-oss-120b </strong>
63
  </td>
64
- <td><strong>gpt-oss120b-moe_w-mxfp4-a-fp8-attn_ptpc-kv-soft_fp8(this model)</strong>
65
  </td>
66
  <td><strong>Recovery</strong>
67
  </td>
@@ -69,21 +69,21 @@ The model was evaluated on AIME25 and GPQA Diamond benchmarks with `medium` reas
69
  <tr>
70
  <td>AIME25
71
  </td>
72
- <td>78.47
73
  </td>
74
- <td>78.33
75
  </td>
76
- <td>99.82%
77
  </td>
78
  </tr>
79
  <tr>
80
  <td>GPQA
81
  </td>
82
- <td>71.86
83
  </td>
84
- <td>71.86
85
  </td>
86
- <td>100.00%
87
  </td>
88
  </tr>
89
  </table>
@@ -94,7 +94,7 @@ The results of AIME25 and GPQA Diamond were obtained using [gpt_oss.evals](https
94
 
95
  #### Launching server
96
  ```
97
- vllm serve amd/gpt-oss120b-moe_w-mxfp4-a-fp8-attn_ptpc-kv-soft_fp8 \
98
  --tensor_parallel_size 2 \
99
  --gpu-memory-utilization 0.90 \
100
  --no-enable-prefix-caching \
@@ -104,7 +104,7 @@ vllm serve amd/gpt-oss120b-moe_w-mxfp4-a-fp8-attn_ptpc-kv-soft_fp8 \
104
 
105
  #### Evaluating model in a new terminal
106
  ```
107
- python -m gpt_oss.evals --model /shareddata/amd/gpt-oss120b-moe_w-mxfp4-a-fp8-attn_ptpc-kv-soft_fp8 --eval aime25,gpqa --reasoning-effort medium --n-threads 128
108
  ```
109
 
110
  # License
 
40
  --attention_dtype fp8 \
41
  --exclude_layers $exclude_layers \
42
  --num_calib_data 512 \
43
+ --output_dir amd/gpt-oss-120b-w-mxfp4-a-fp8-qkvo-ptpc-fp8-kv-fp8-fp8attn \
44
  --model_export hf_format \
45
  --multi_gpu
46
  ```
 
61
  </td>
62
  <td><strong>gpt-oss-120b </strong>
63
  </td>
64
+ <td><strong>gpt-oss-120b-w-mxfp4-a-fp8-qkvo-ptpc-fp8-kv-fp8-fp8attn(this model)</strong>
65
  </td>
66
  <td><strong>Recovery</strong>
67
  </td>
 
69
  <tr>
70
  <td>AIME25
71
  </td>
72
+ <td>78.61
73
  </td>
74
+ <td>77.08
75
  </td>
76
+ <td>98.06%
77
  </td>
78
  </tr>
79
  <tr>
80
  <td>GPQA
81
  </td>
82
+ <td>71.21
83
  </td>
84
+ <td>71.16
85
  </td>
86
+ <td>99.93%
87
  </td>
88
  </tr>
89
  </table>
 
94
 
95
  #### Launching server
96
  ```
97
+ vllm serve amd/gpt-oss-120b-w-mxfp4-a-fp8-qkvo-ptpc-fp8-kv-fp8-fp8attn \
98
  --tensor_parallel_size 2 \
99
  --gpu-memory-utilization 0.90 \
100
  --no-enable-prefix-caching \
 
104
 
105
  #### Evaluating model in a new terminal
106
  ```
107
+ python -m gpt_oss.evals --model /shareddata/amd/gpt-oss-120b-w-mxfp4-a-fp8-qkvo-ptpc-fp8-kv-fp8-fp8attn --eval aime25,gpqa --reasoning-effort medium --n-threads 128
108
  ```
109
 
110
  # License