Update README.md
Browse fileschange the model name
README.md
CHANGED
|
@@ -40,7 +40,7 @@ python3 internal_scripts/quantize_quark.py \
|
|
| 40 |
--attention_dtype fp8 \
|
| 41 |
--exclude_layers $exclude_layers \
|
| 42 |
--num_calib_data 512 \
|
| 43 |
-
--output_dir amd/gpt-
|
| 44 |
--model_export hf_format \
|
| 45 |
--multi_gpu
|
| 46 |
```
|
|
@@ -61,7 +61,7 @@ The model was evaluated on AIME25 and GPQA Diamond benchmarks with `medium` reas
|
|
| 61 |
</td>
|
| 62 |
<td><strong>gpt-oss-120b </strong>
|
| 63 |
</td>
|
| 64 |
-
<td><strong>gpt-
|
| 65 |
</td>
|
| 66 |
<td><strong>Recovery</strong>
|
| 67 |
</td>
|
|
@@ -69,21 +69,21 @@ The model was evaluated on AIME25 and GPQA Diamond benchmarks with `medium` reas
|
|
| 69 |
<tr>
|
| 70 |
<td>AIME25
|
| 71 |
</td>
|
| 72 |
-
<td>78.
|
| 73 |
</td>
|
| 74 |
-
<td>
|
| 75 |
</td>
|
| 76 |
-
<td>
|
| 77 |
</td>
|
| 78 |
</tr>
|
| 79 |
<tr>
|
| 80 |
<td>GPQA
|
| 81 |
</td>
|
| 82 |
-
<td>71.
|
| 83 |
</td>
|
| 84 |
-
<td>71.
|
| 85 |
</td>
|
| 86 |
-
<td>
|
| 87 |
</td>
|
| 88 |
</tr>
|
| 89 |
</table>
|
|
@@ -94,7 +94,7 @@ The results of AIME25 and GPQA Diamond were obtained using [gpt_oss.evals](https
|
|
| 94 |
|
| 95 |
#### Launching server
|
| 96 |
```
|
| 97 |
-
vllm serve amd/gpt-
|
| 98 |
--tensor_parallel_size 2 \
|
| 99 |
--gpu-memory-utilization 0.90 \
|
| 100 |
--no-enable-prefix-caching \
|
|
@@ -104,7 +104,7 @@ vllm serve amd/gpt-oss120b-moe_w-mxfp4-a-fp8-attn_ptpc-kv-soft_fp8 \
|
|
| 104 |
|
| 105 |
#### Evaluating model in a new terminal
|
| 106 |
```
|
| 107 |
-
python -m gpt_oss.evals --model /shareddata/amd/gpt-
|
| 108 |
```
|
| 109 |
|
| 110 |
# License
|
|
|
|
| 40 |
--attention_dtype fp8 \
|
| 41 |
--exclude_layers $exclude_layers \
|
| 42 |
--num_calib_data 512 \
|
| 43 |
+
--output_dir amd/gpt-oss-120b-w-mxfp4-a-fp8-qkvo-ptpc-fp8-kv-fp8-fp8attn \
|
| 44 |
--model_export hf_format \
|
| 45 |
--multi_gpu
|
| 46 |
```
|
|
|
|
| 61 |
</td>
|
| 62 |
<td><strong>gpt-oss-120b </strong>
|
| 63 |
</td>
|
| 64 |
+
<td><strong>gpt-oss-120b-w-mxfp4-a-fp8-qkvo-ptpc-fp8-kv-fp8-fp8attn(this model)</strong>
|
| 65 |
</td>
|
| 66 |
<td><strong>Recovery</strong>
|
| 67 |
</td>
|
|
|
|
| 69 |
<tr>
|
| 70 |
<td>AIME25
|
| 71 |
</td>
|
| 72 |
+
<td>78.61
|
| 73 |
</td>
|
| 74 |
+
<td>77.08
|
| 75 |
</td>
|
| 76 |
+
<td>98.06%
|
| 77 |
</td>
|
| 78 |
</tr>
|
| 79 |
<tr>
|
| 80 |
<td>GPQA
|
| 81 |
</td>
|
| 82 |
+
<td>71.21
|
| 83 |
</td>
|
| 84 |
+
<td>71.16
|
| 85 |
</td>
|
| 86 |
+
<td>99.93%
|
| 87 |
</td>
|
| 88 |
</tr>
|
| 89 |
</table>
|
|
|
|
| 94 |
|
| 95 |
#### Launching server
|
| 96 |
```
|
| 97 |
+
vllm serve amd/gpt-oss-120b-w-mxfp4-a-fp8-qkvo-ptpc-fp8-kv-fp8-fp8attn \
|
| 98 |
--tensor_parallel_size 2 \
|
| 99 |
--gpu-memory-utilization 0.90 \
|
| 100 |
--no-enable-prefix-caching \
|
|
|
|
| 104 |
|
| 105 |
#### Evaluating model in a new terminal
|
| 106 |
```
|
| 107 |
+
python -m gpt_oss.evals --model /shareddata/amd/gpt-oss-120b-w-mxfp4-a-fp8-qkvo-ptpc-fp8-kv-fp8-fp8attn --eval aime25,gpqa --reasoning-effort medium --n-threads 128
|
| 108 |
```
|
| 109 |
|
| 110 |
# License
|