krishnateja95 commited on
Commit
354a81c
·
verified ·
1 Parent(s): 1231910

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +29 -20
README.md CHANGED
@@ -131,38 +131,47 @@ The model was evaluated on the OpenLLMv1 leaderboard task, using [lm-evaluation-
131
  <details>
132
  <summary>Evaluation details</summary>
133
 
134
- **lm-evaluation-harness**
135
  ```
136
  lm_eval \
137
  --model vllm \
138
- --model_args pretrained="nm-testing/Qwen3-14B-FP8-block",dtype=auto,add_bos_token=True,max_model_len=4096,tensor_parallel_size=4,gpu_memory_utilization=0.8,enable_chunked_prefill=True \
139
  --tasks openllm \
140
  --write_out \
141
  --batch_size auto \
142
- --output_path output_dir \
143
  --show_config
144
  ```
145
 
146
- **lighteval**
147
-
148
- lighteval_model_arguments.yaml
149
- ```yaml
150
- model_parameters:
151
- model_name: nm-testing/Qwen3-14B-FP8-block
152
- dtype: auto
153
- gpu_memory_utilization: 0.9
154
- generation_parameters:
155
- temperature: 0.6
156
- min_p: 0.0
157
- top_p: 0.95
158
- top_k: 20
159
- max_new_tokens: 32768
160
  ```
161
 
 
 
 
162
  ```
163
- lighteval vllm \
164
- --model_args lighteval_model_arguments.yaml \
165
- --tasks lighteval|aime25|0 \
 
 
 
 
 
 
 
166
  ```
167
 
168
 
 
131
  <details>
132
  <summary>Evaluation details</summary>
133
 
134
+ **Openllm V1**
135
  ```
136
  lm_eval \
137
  --model vllm \
138
+ --model_args pretrained=$model,dtype=auto,add_bos_token=True,max_model_len=16384,tensor_parallel_size=4,gpu_memory_utilization=0.9,enable_chunked_prefill=True,trust_remote_code=True \
139
  --tasks openllm \
140
  --write_out \
141
  --batch_size auto \
142
+ --output_path $output_path/openllm.json \
143
  --show_config
144
  ```
145
 
146
+
147
+ **Openllm V2**
148
+ ```
149
+ lm_eval \
150
+ --model vllm \
151
+ --model_args pretrained=$model,dtype=auto,add_bos_token=False,max_model_len=16384,tensor_parallel_size=4,gpu_memory_utilization=0.7,disable_log_stats=True,enable_chunked_prefill=True,trust_remote_code=True \
152
+ --tasks leaderboard \
153
+ --apply_chat_template \
154
+ --fewshot_as_multiturn \
155
+ --write_out \
156
+ --batch_size auto \
157
+ --output_path $output_path/leaderboard.json \
158
+ --show_config
 
159
  ```
160
 
161
+
162
+ **Coding Benchmarks**
163
+
164
  ```
165
+ evalplus.evaluate --model $model \
166
+ --dataset "humaneval" \
167
+ --backend vllm \
168
+ --tp 4 \
169
+ --greedy
170
+ evalplus.evaluate --model $model \
171
+ --dataset "mbpp" \
172
+ --backend vllm \
173
+ --tp 4 \
174
+ --greedy
175
  ```
176
 
177