Update README.md
Browse files
README.md
CHANGED
|
@@ -131,38 +131,47 @@ The model was evaluated on the OpenLLMv1 leaderboard task, using [lm-evaluation-
|
|
| 131 |
<details>
|
| 132 |
<summary>Evaluation details</summary>
|
| 133 |
|
| 134 |
-
**
|
| 135 |
```
|
| 136 |
lm_eval \
|
| 137 |
--model vllm \
|
| 138 |
-
--model_args pretrained=
|
| 139 |
--tasks openllm \
|
| 140 |
--write_out \
|
| 141 |
--batch_size auto \
|
| 142 |
-
--output_path
|
| 143 |
--show_config
|
| 144 |
```
|
| 145 |
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
max_new_tokens: 32768
|
| 160 |
```
|
| 161 |
|
|
|
|
|
|
|
|
|
|
| 162 |
```
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 166 |
```
|
| 167 |
|
| 168 |
|
|
|
|
| 131 |
<details>
|
| 132 |
<summary>Evaluation details</summary>
|
| 133 |
|
| 134 |
+
**Openllm V1**
|
| 135 |
```
|
| 136 |
lm_eval \
|
| 137 |
--model vllm \
|
| 138 |
+
--model_args pretrained=$model,dtype=auto,add_bos_token=True,max_model_len=16384,tensor_parallel_size=4,gpu_memory_utilization=0.9,enable_chunked_prefill=True,trust_remote_code=True \
|
| 139 |
--tasks openllm \
|
| 140 |
--write_out \
|
| 141 |
--batch_size auto \
|
| 142 |
+
--output_path $output_path/openllm.json \
|
| 143 |
--show_config
|
| 144 |
```
|
| 145 |
|
| 146 |
+
|
| 147 |
+
**Openllm V2**
|
| 148 |
+
```
|
| 149 |
+
lm_eval \
|
| 150 |
+
--model vllm \
|
| 151 |
+
--model_args pretrained=$model,dtype=auto,add_bos_token=False,max_model_len=16384,tensor_parallel_size=4,gpu_memory_utilization=0.7,disable_log_stats=True,enable_chunked_prefill=True,trust_remote_code=True \
|
| 152 |
+
--tasks leaderboard \
|
| 153 |
+
--apply_chat_template \
|
| 154 |
+
--fewshot_as_multiturn \
|
| 155 |
+
--write_out \
|
| 156 |
+
--batch_size auto \
|
| 157 |
+
--output_path $output_path/leaderboard.json \
|
| 158 |
+
--show_config
|
|
|
|
| 159 |
```
|
| 160 |
|
| 161 |
+
|
| 162 |
+
**Coding Benchmarks**
|
| 163 |
+
|
| 164 |
```
|
| 165 |
+
evalplus.evaluate --model $model \
|
| 166 |
+
--dataset "humaneval" \
|
| 167 |
+
--backend vllm \
|
| 168 |
+
--tp 4 \
|
| 169 |
+
--greedy
|
| 170 |
+
evalplus.evaluate --model $model \
|
| 171 |
+
--dataset "mbpp" \
|
| 172 |
+
--backend vllm \
|
| 173 |
+
--tp 4 \
|
| 174 |
+
--greedy
|
| 175 |
```
|
| 176 |
|
| 177 |
|