Update README.md
Browse files
README.md
CHANGED
|
@@ -532,6 +532,72 @@ outputs = llm.chat(messages, sampling_params=sampling_params)
|
|
| 532 |
print(outputs[0].outputs[0].text)
|
| 533 |
```
|
| 534 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 535 |
## Inference Performance
|
| 536 |
|
| 537 |
|
|
|
|
| 532 |
print(outputs[0].outputs[0].text)
|
| 533 |
```
|
| 534 |
|
| 535 |
+
## Accuracy
|
| 536 |
+
<table>
|
| 537 |
+
<thead>
|
| 538 |
+
<tr>
|
| 539 |
+
<th>Category</th>
|
| 540 |
+
<th>Metric</th>
|
| 541 |
+
<th>neuralmagic/Pixtral-Large-Instruct-2411-hf</th>
|
| 542 |
+
<th>neuralmagic/Pixtral-Large-Instruct-2411-hf-FP8-dynamic</th>
|
| 543 |
+
<th>Recovery (%)</th>
|
| 544 |
+
</tr>
|
| 545 |
+
</thead>
|
| 546 |
+
<tbody>
|
| 547 |
+
<tr>
|
| 548 |
+
<td rowspan="6"><b>Vision</b></td>
|
| 549 |
+
<td>MMMU (val, CoT)<br><i>explicit_prompt_relaxed_correctness</i></td>
|
| 550 |
+
<td>63.56</td>
|
| 551 |
+
<td>63.44</td>
|
| 552 |
+
<td>99.81%</td>
|
| 553 |
+
</tr>
|
| 554 |
+
<tr>
|
| 555 |
+
<td>VQAv2 (val)<br><i>vqa_match</i></td>
|
| 556 |
+
<td>79.03</td>
|
| 557 |
+
<td>79.06</td>
|
| 558 |
+
<td>100.04%</td>
|
| 559 |
+
</tr>
|
| 560 |
+
<tr>
|
| 561 |
+
<td>DocVQA (val)<br><i>anls</i></td>
|
| 562 |
+
<td>89.55</td>
|
| 563 |
+
<td>89.63</td>
|
| 564 |
+
<td>100.09%</td>
|
| 565 |
+
</tr>
|
| 566 |
+
<tr>
|
| 567 |
+
<td>ChartQA (test, CoT)<br><i>anywhere_in_answer_relaxed_correctness</i></td>
|
| 568 |
+
<td>82.24</td>
|
| 569 |
+
<td>82.80</td>
|
| 570 |
+
<td>100.68%</td>
|
| 571 |
+
</tr>
|
| 572 |
+
<tr>
|
| 573 |
+
<td>Mathvista (testmini, CoT)<br><i>explicit_prompt_relaxed_correctness</i></td>
|
| 574 |
+
<td>67.3</td>
|
| 575 |
+
<td>66.50</td>
|
| 576 |
+
<td>98.81%</td>
|
| 577 |
+
</tr>
|
| 578 |
+
<tr>
|
| 579 |
+
<td><b>Average Score</b></td>
|
| 580 |
+
<td><b>76.34</b></td>
|
| 581 |
+
<td><b>76.29</b></td>
|
| 582 |
+
<td><b>99.93%</b></td>
|
| 583 |
+
</tr>
|
| 584 |
+
<tr>
|
| 585 |
+
<td rowspan="2"><b>Text</b></td>
|
| 586 |
+
<td>MGSM (CoT)</td>
|
| 587 |
+
<td>76.05</td>
|
| 588 |
+
<td>75.58</td>
|
| 589 |
+
<td>99.38%</td>
|
| 590 |
+
</tr>
|
| 591 |
+
<tr>
|
| 592 |
+
<td>MMLU (5-shot)</td>
|
| 593 |
+
<td>82.8</td>
|
| 594 |
+
<td>82.74</td>
|
| 595 |
+
<td>99.93%</td>
|
| 596 |
+
</tr>
|
| 597 |
+
</tbody>
|
| 598 |
+
</table>
|
| 599 |
+
|
| 600 |
+
|
| 601 |
## Inference Performance
|
| 602 |
|
| 603 |
|