Update README.md
Browse files
README.md
CHANGED
|
@@ -655,21 +655,21 @@ The following performance benchmarks were conducted with [vLLM](https://docs.vll
|
|
| 655 |
<td>neuralmagic/Pixtral-Large-Instruct-2411-hf-quantized.w8a8</td>
|
| 656 |
<td>1.70</td>
|
| 657 |
<td>0.8</td>
|
| 658 |
-
<td>
|
| 659 |
<td>1.1</td>
|
| 660 |
-
<td>
|
| 661 |
<td>1.3</td>
|
| 662 |
-
<td>
|
| 663 |
</tr>
|
| 664 |
<tr>
|
| 665 |
<td>neuralmagic/Pixtral-Large-Instruct-2411-hf-quantized.w4a16</td>
|
| 666 |
<td>1.48</td>
|
| 667 |
<td>0.5</td>
|
| 668 |
-
<td>
|
| 669 |
<td>1.0</td>
|
| 670 |
-
<td>
|
| 671 |
<td>1.4</td>
|
| 672 |
-
<td>
|
| 673 |
</tr>
|
| 674 |
<tr>
|
| 675 |
<<th rowspan="3" valign="top">H100x4</th>
|
|
@@ -686,21 +686,21 @@ The following performance benchmarks were conducted with [vLLM](https://docs.vll
|
|
| 686 |
<td>neuralmagic/Pixtral-Large-Instruct-2411-hf-FP8-Dynamic</td>
|
| 687 |
<td>1.61</td>
|
| 688 |
<td>1.7</td>
|
| 689 |
-
<td>
|
| 690 |
<td>2.6</td>
|
| 691 |
-
<td>
|
| 692 |
<td>3.2</td>
|
| 693 |
-
<td>
|
| 694 |
</tr>
|
| 695 |
<tr>
|
| 696 |
<td>neuralmagic/Pixtral-Large-Instruct-2411-hf-quantized.w4a16</td>
|
| 697 |
<td>1.33</td>
|
| 698 |
<td>1.4</td>
|
| 699 |
-
<td>
|
| 700 |
<td>2.2</td>
|
| 701 |
-
<td>
|
| 702 |
<td>2.7</td>
|
| 703 |
-
<td>
|
| 704 |
</tr>
|
| 705 |
</tbody>
|
| 706 |
</table>
|
|
|
|
| 655 |
<td>neuralmagic/Pixtral-Large-Instruct-2411-hf-quantized.w8a8</td>
|
| 656 |
<td>1.70</td>
|
| 657 |
<td>0.8</td>
|
| 658 |
+
<td>383</td>
|
| 659 |
<td>1.1</td>
|
| 660 |
+
<td>571</td>
|
| 661 |
<td>1.3</td>
|
| 662 |
+
<td>674</td>
|
| 663 |
</tr>
|
| 664 |
<tr>
|
| 665 |
<td>neuralmagic/Pixtral-Large-Instruct-2411-hf-quantized.w4a16</td>
|
| 666 |
<td>1.48</td>
|
| 667 |
<td>0.5</td>
|
| 668 |
+
<td>276</td>
|
| 669 |
<td>1.0</td>
|
| 670 |
+
<td>505</td>
|
| 671 |
<td>1.4</td>
|
| 672 |
+
<td>680</td>
|
| 673 |
</tr>
|
| 674 |
<tr>
|
| 675 |
<<th rowspan="3" valign="top">H100x4</th>
|
|
|
|
| 686 |
<td>neuralmagic/Pixtral-Large-Instruct-2411-hf-FP8-Dynamic</td>
|
| 687 |
<td>1.61</td>
|
| 688 |
<td>1.7</td>
|
| 689 |
+
<td>467</td>
|
| 690 |
<td>2.6</td>
|
| 691 |
+
<td>726</td>
|
| 692 |
<td>3.2</td>
|
| 693 |
+
<td>908</td>
|
| 694 |
</tr>
|
| 695 |
<tr>
|
| 696 |
<td>neuralmagic/Pixtral-Large-Instruct-2411-hf-quantized.w4a16</td>
|
| 697 |
<td>1.33</td>
|
| 698 |
<td>1.4</td>
|
| 699 |
+
<td>393</td>
|
| 700 |
<td>2.2</td>
|
| 701 |
+
<td>726</td>
|
| 702 |
<td>2.7</td>
|
| 703 |
+
<td>764</td>
|
| 704 |
</tr>
|
| 705 |
</tbody>
|
| 706 |
</table>
|