Update README.md
Browse files
README.md
CHANGED
|
@@ -298,7 +298,7 @@ guidellm --model neuralmagic/granite-3.1-2b-base-quantized.w8a8 --target "http:/
|
|
| 298 |
<th>Docstring Generation<br>prefill: 768 tokens<br>decode: 128 tokens</th>
|
| 299 |
<th>Code Fixing<br>prefill: 1024 tokens<br>decode: 1024 tokens</th>
|
| 300 |
<th>RAG<br>prefill: 1024 tokens<br>decode: 128 tokens</th>
|
| 301 |
-
<th>
|
| 302 |
<th>Multi-turn Chat<br>prefill: 512 tokens<br>decode: 256 tokens</th>
|
| 303 |
<th>Large Summarization<br>prefill: 4096 tokens<br>decode: 512 tokens</th>
|
| 304 |
</tr>
|
|
@@ -326,7 +326,7 @@ guidellm --model neuralmagic/granite-3.1-2b-base-quantized.w8a8 --target "http:/
|
|
| 326 |
<td>4.7</td>
|
| 327 |
</tr>
|
| 328 |
<tr>
|
| 329 |
-
<td>granite-3.1-2b-base-quantized.
|
| 330 |
<td>1.94</td>
|
| 331 |
<td>5.4</td>
|
| 332 |
<td>0.7</td>
|
|
@@ -360,7 +360,7 @@ guidellm --model neuralmagic/granite-3.1-2b-base-quantized.w8a8 --target "http:/
|
|
| 360 |
<td>4.5</td>
|
| 361 |
</tr>
|
| 362 |
<tr>
|
| 363 |
-
<td>granite-3.1-2b-base-quantized.
|
| 364 |
<td>1.87</td>
|
| 365 |
<td>5.1</td>
|
| 366 |
<td>0.7</td>
|
|
@@ -417,7 +417,7 @@ guidellm --model neuralmagic/granite-3.1-2b-base-quantized.w8a8 --target "http:/
|
|
| 417 |
<td>1.4</td>
|
| 418 |
</tr>
|
| 419 |
<tr>
|
| 420 |
-
<td>granite-3.1-2b-base-quantized.
|
| 421 |
<td>0.98</td>
|
| 422 |
<td>2.8</td>
|
| 423 |
<td>10.0</td>
|
|
@@ -451,7 +451,7 @@ guidellm --model neuralmagic/granite-3.1-2b-base-quantized.w8a8 --target "http:/
|
|
| 451 |
<td>1.7</td>
|
| 452 |
</tr>
|
| 453 |
<tr>
|
| 454 |
-
<td>granite-3.1-2b-base-quantized.
|
| 455 |
<td>0.95</td>
|
| 456 |
<td>3.7</td>
|
| 457 |
<td>11.4</td>
|
|
@@ -462,4 +462,3 @@ guidellm --model neuralmagic/granite-3.1-2b-base-quantized.w8a8 --target "http:/
|
|
| 462 |
<td>1.4</td>
|
| 463 |
</tr>
|
| 464 |
</table>
|
| 465 |
-
|
|
|
|
| 298 |
<th>Docstring Generation<br>prefill: 768 tokens<br>decode: 128 tokens</th>
|
| 299 |
<th>Code Fixing<br>prefill: 1024 tokens<br>decode: 1024 tokens</th>
|
| 300 |
<th>RAG<br>prefill: 1024 tokens<br>decode: 128 tokens</th>
|
| 301 |
+
<th>Instruction Following<br>prefill: 256 tokens<br>decode: 128 tokens</th>
|
| 302 |
<th>Multi-turn Chat<br>prefill: 512 tokens<br>decode: 256 tokens</th>
|
| 303 |
<th>Large Summarization<br>prefill: 4096 tokens<br>decode: 512 tokens</th>
|
| 304 |
</tr>
|
|
|
|
| 326 |
<td>4.7</td>
|
| 327 |
</tr>
|
| 328 |
<tr>
|
| 329 |
+
<td>granite-3.1-2b-base-quantized.w4a16</td>
|
| 330 |
<td>1.94</td>
|
| 331 |
<td>5.4</td>
|
| 332 |
<td>0.7</td>
|
|
|
|
| 360 |
<td>4.5</td>
|
| 361 |
</tr>
|
| 362 |
<tr>
|
| 363 |
+
<td>granite-3.1-2b-base-quantized.w4a16</td>
|
| 364 |
<td>1.87</td>
|
| 365 |
<td>5.1</td>
|
| 366 |
<td>0.7</td>
|
|
|
|
| 417 |
<td>1.4</td>
|
| 418 |
</tr>
|
| 419 |
<tr>
|
| 420 |
+
<td>granite-3.1-2b-base-quantized.w4a16</td>
|
| 421 |
<td>0.98</td>
|
| 422 |
<td>2.8</td>
|
| 423 |
<td>10.0</td>
|
|
|
|
| 451 |
<td>1.7</td>
|
| 452 |
</tr>
|
| 453 |
<tr>
|
| 454 |
+
<td>granite-3.1-2b-base-quantized.w4a16</td>
|
| 455 |
<td>0.95</td>
|
| 456 |
<td>3.7</td>
|
| 457 |
<td>11.4</td>
|
|
|
|
| 462 |
<td>1.4</td>
|
| 463 |
</tr>
|
| 464 |
</table>
|
|
|