nm-research commited on
Commit
4032746
·
verified ·
1 Parent(s): b89cdac

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +5 -6
README.md CHANGED
@@ -298,7 +298,7 @@ guidellm --model neuralmagic/granite-3.1-2b-base-quantized.w8a8 --target "http:/
298
  <th>Docstring Generation<br>prefill: 768 tokens<br>decode: 128 tokens</th>
299
  <th>Code Fixing<br>prefill: 1024 tokens<br>decode: 1024 tokens</th>
300
  <th>RAG<br>prefill: 1024 tokens<br>decode: 128 tokens</th>
301
- <th>baseion Following<br>prefill: 256 tokens<br>decode: 128 tokens</th>
302
  <th>Multi-turn Chat<br>prefill: 512 tokens<br>decode: 256 tokens</th>
303
  <th>Large Summarization<br>prefill: 4096 tokens<br>decode: 512 tokens</th>
304
  </tr>
@@ -326,7 +326,7 @@ guidellm --model neuralmagic/granite-3.1-2b-base-quantized.w8a8 --target "http:/
326
  <td>4.7</td>
327
  </tr>
328
  <tr>
329
- <td>granite-3.1-2b-base-quantized.w8a8</td>
330
  <td>1.94</td>
331
  <td>5.4</td>
332
  <td>0.7</td>
@@ -360,7 +360,7 @@ guidellm --model neuralmagic/granite-3.1-2b-base-quantized.w8a8 --target "http:/
360
  <td>4.5</td>
361
  </tr>
362
  <tr>
363
- <td>granite-3.1-2b-base-quantized.w8a8</td>
364
  <td>1.87</td>
365
  <td>5.1</td>
366
  <td>0.7</td>
@@ -417,7 +417,7 @@ guidellm --model neuralmagic/granite-3.1-2b-base-quantized.w8a8 --target "http:/
417
  <td>1.4</td>
418
  </tr>
419
  <tr>
420
- <td>granite-3.1-2b-base-quantized.w8a8</td>
421
  <td>0.98</td>
422
  <td>2.8</td>
423
  <td>10.0</td>
@@ -451,7 +451,7 @@ guidellm --model neuralmagic/granite-3.1-2b-base-quantized.w8a8 --target "http:/
451
  <td>1.7</td>
452
  </tr>
453
  <tr>
454
- <td>granite-3.1-2b-base-quantized.w8a8</td>
455
  <td>0.95</td>
456
  <td>3.7</td>
457
  <td>11.4</td>
@@ -462,4 +462,3 @@ guidellm --model neuralmagic/granite-3.1-2b-base-quantized.w8a8 --target "http:/
462
  <td>1.4</td>
463
  </tr>
464
  </table>
465
-
 
298
  <th>Docstring Generation<br>prefill: 768 tokens<br>decode: 128 tokens</th>
299
  <th>Code Fixing<br>prefill: 1024 tokens<br>decode: 1024 tokens</th>
300
  <th>RAG<br>prefill: 1024 tokens<br>decode: 128 tokens</th>
301
+ <th>Instruction Following<br>prefill: 256 tokens<br>decode: 128 tokens</th>
302
  <th>Multi-turn Chat<br>prefill: 512 tokens<br>decode: 256 tokens</th>
303
  <th>Large Summarization<br>prefill: 4096 tokens<br>decode: 512 tokens</th>
304
  </tr>
 
326
  <td>4.7</td>
327
  </tr>
328
  <tr>
329
+ <td>granite-3.1-2b-base-quantized.w4a16</td>
330
  <td>1.94</td>
331
  <td>5.4</td>
332
  <td>0.7</td>
 
360
  <td>4.5</td>
361
  </tr>
362
  <tr>
363
+ <td>granite-3.1-2b-base-quantized.w4a16</td>
364
  <td>1.87</td>
365
  <td>5.1</td>
366
  <td>0.7</td>
 
417
  <td>1.4</td>
418
  </tr>
419
  <tr>
420
+ <td>granite-3.1-2b-base-quantized.w4a16</td>
421
  <td>0.98</td>
422
  <td>2.8</td>
423
  <td>10.0</td>
 
451
  <td>1.7</td>
452
  </tr>
453
  <tr>
454
+ <td>granite-3.1-2b-base-quantized.w4a16</td>
455
  <td>0.95</td>
456
  <td>3.7</td>
457
  <td>11.4</td>
 
462
  <td>1.4</td>
463
  </tr>
464
  </table>