Update README.md
Browse files
README.md
CHANGED
|
@@ -200,7 +200,7 @@ This version of the lm-evaluation-harness includes versions of ARC-Challenge and
|
|
| 200 |
</td>
|
| 201 |
</tr>
|
| 202 |
<tr>
|
| 203 |
-
<td>GSM-8K-cot (8-shot, strict-match)
|
| 204 |
</td>
|
| 205 |
<td>82.03
|
| 206 |
</td>
|
|
@@ -230,7 +230,7 @@ This version of the lm-evaluation-harness includes versions of ARC-Challenge and
|
|
| 230 |
</td>
|
| 231 |
</tr>
|
| 232 |
<tr>
|
| 233 |
-
<td>TruthfulQA (0-shot)
|
| 234 |
</td>
|
| 235 |
<td>54.04
|
| 236 |
</td>
|
|
@@ -283,6 +283,7 @@ lm_eval \
|
|
| 283 |
--model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-FP8",dtype=auto,add_bos_token=True,max_model_len=4096,tensor_parallel_size=1 \
|
| 284 |
--tasks gsm8k_cot_llama_3.1_instruct \
|
| 285 |
--apply_chat_template \
|
|
|
|
| 286 |
--num_fewshot 8 \
|
| 287 |
--batch_size auto
|
| 288 |
```
|
|
@@ -312,7 +313,7 @@ lm_eval \
|
|
| 312 |
lm_eval \
|
| 313 |
--model vllm \
|
| 314 |
--model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-FP8",dtype=auto,add_bos_token=True,max_model_len=4096,tensor_parallel_size=1 \
|
| 315 |
-
--tasks
|
| 316 |
--num_fewshot 0 \
|
| 317 |
--batch_size auto
|
| 318 |
```
|
|
|
|
| 200 |
</td>
|
| 201 |
</tr>
|
| 202 |
<tr>
|
| 203 |
+
<td>GSM-8K-cot (CoT, 8-shot, strict-match)
|
| 204 |
</td>
|
| 205 |
<td>82.03
|
| 206 |
</td>
|
|
|
|
| 230 |
</td>
|
| 231 |
</tr>
|
| 232 |
<tr>
|
| 233 |
+
<td>TruthfulQA (0-shot, mc2)
|
| 234 |
</td>
|
| 235 |
<td>54.04
|
| 236 |
</td>
|
|
|
|
| 283 |
--model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-FP8",dtype=auto,add_bos_token=True,max_model_len=4096,tensor_parallel_size=1 \
|
| 284 |
--tasks gsm8k_cot_llama_3.1_instruct \
|
| 285 |
--apply_chat_template \
|
| 286 |
+
--fewshot_as_multiturn \
|
| 287 |
--num_fewshot 8 \
|
| 288 |
--batch_size auto
|
| 289 |
```
|
|
|
|
| 313 |
lm_eval \
|
| 314 |
--model vllm \
|
| 315 |
--model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-FP8",dtype=auto,add_bos_token=True,max_model_len=4096,tensor_parallel_size=1 \
|
| 316 |
+
--tasks truthfulqa \
|
| 317 |
--num_fewshot 0 \
|
| 318 |
--batch_size auto
|
| 319 |
```
|