Update README.md
Browse files
README.md
CHANGED
|
@@ -412,21 +412,21 @@ The following performance benchmarks were conducted with [vLLM](https://docs.vll
|
|
| 412 |
<td>neuralmagic/Pixtral-Large-Instruct-2411-hf-quantized.w8a8</td>
|
| 413 |
<td>1.70</td>
|
| 414 |
<td>1.6</td>
|
| 415 |
-
<td>
|
| 416 |
<td>2.2</td>
|
| 417 |
-
<td>
|
| 418 |
<td>2.6</td>
|
| 419 |
-
<td>
|
| 420 |
</tr>
|
| 421 |
<tr>
|
| 422 |
<td>neuralmagic/Pixtral-Large-Instruct-2411-hf-quantized.w4a16</td>
|
| 423 |
<td>1.48</td>
|
| 424 |
<td>1.0</td>
|
| 425 |
-
<td>
|
| 426 |
<td>2.0</td>
|
| 427 |
-
<td>
|
| 428 |
<td>2.8</td>
|
| 429 |
-
<td>
|
| 430 |
</tr>
|
| 431 |
<tr>
|
| 432 |
<<th rowspan="3" valign="top">H100x4</th>
|
|
@@ -443,21 +443,21 @@ The following performance benchmarks were conducted with [vLLM](https://docs.vll
|
|
| 443 |
<td>neuralmagic/Pixtral-Large-Instruct-2411-hf-FP8-Dynamic</td>
|
| 444 |
<td>1.61</td>
|
| 445 |
<td>3.4</td>
|
| 446 |
-
<td>
|
| 447 |
<td>5.2</td>
|
| 448 |
-
<td>
|
| 449 |
<td>6.4</td>
|
| 450 |
-
<td>
|
| 451 |
</tr>
|
| 452 |
<tr>
|
| 453 |
<td>neuralmagic/Pixtral-Large-Instruct-2411-hf-quantized.w4a16</td>
|
| 454 |
<td>1.33</td>
|
| 455 |
<td>2.8</td>
|
| 456 |
-
<td>
|
| 457 |
<td>4.4</td>
|
| 458 |
-
<td>
|
| 459 |
<td>5.4</td>
|
| 460 |
-
<td>
|
| 461 |
</tr>
|
| 462 |
</tbody>
|
| 463 |
</table>
|
|
|
|
| 412 |
<td>neuralmagic/Pixtral-Large-Instruct-2411-hf-quantized.w8a8</td>
|
| 413 |
<td>1.70</td>
|
| 414 |
<td>1.6</td>
|
| 415 |
+
<td>766</td>
|
| 416 |
<td>2.2</td>
|
| 417 |
+
<td>1142</td>
|
| 418 |
<td>2.6</td>
|
| 419 |
+
<td>1348</td>
|
| 420 |
</tr>
|
| 421 |
<tr>
|
| 422 |
<td>neuralmagic/Pixtral-Large-Instruct-2411-hf-quantized.w4a16</td>
|
| 423 |
<td>1.48</td>
|
| 424 |
<td>1.0</td>
|
| 425 |
+
<td>552</td>
|
| 426 |
<td>2.0</td>
|
| 427 |
+
<td>1010</td>
|
| 428 |
<td>2.8</td>
|
| 429 |
+
<td>1360</td>
|
| 430 |
</tr>
|
| 431 |
<tr>
|
| 432 |
<<th rowspan="3" valign="top">H100x4</th>
|
|
|
|
| 443 |
<td>neuralmagic/Pixtral-Large-Instruct-2411-hf-FP8-Dynamic</td>
|
| 444 |
<td>1.61</td>
|
| 445 |
<td>3.4</td>
|
| 446 |
+
<td>905</td>
|
| 447 |
<td>5.2</td>
|
| 448 |
+
<td>1406</td>
|
| 449 |
<td>6.4</td>
|
| 450 |
+
<td>1759</td>
|
| 451 |
</tr>
|
| 452 |
<tr>
|
| 453 |
<td>neuralmagic/Pixtral-Large-Instruct-2411-hf-quantized.w4a16</td>
|
| 454 |
<td>1.33</td>
|
| 455 |
<td>2.8</td>
|
| 456 |
+
<td>761</td>
|
| 457 |
<td>4.4</td>
|
| 458 |
+
<td>1228</td>
|
| 459 |
<td>5.4</td>
|
| 460 |
+
<td>1480</td>
|
| 461 |
</tr>
|
| 462 |
</tbody>
|
| 463 |
</table>
|