jash404 commited on
Commit
5286c6a
·
verified ·
1 Parent(s): d157524

Add Qwen2-1.5B-Instruct base GSM8K test eval

Browse files
EVAL/gsm8k_test/Qwen2-1.5B-Instruct/base/args.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_name": "Qwen/Qwen2-1.5B-Instruct",
3
+ "output_dir": "output/gsm8k_test_eval/Qwen2-1.5B-Instruct/base",
4
+ "max_examples": null,
5
+ "batch_size": 512,
6
+ "num_chains": 1,
7
+ "temperature": 0.9,
8
+ "max_prompt_length": 1024,
9
+ "max_completion_length": 768,
10
+ "seed": 7111994
11
+ }
EVAL/gsm8k_test/Qwen2-1.5B-Instruct/base/per_question.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
EVAL/gsm8k_test/Qwen2-1.5B-Instruct/base/per_question.txt ADDED
The diff for this file is too large to render. See raw diff
 
EVAL/gsm8k_test/Qwen2-1.5B-Instruct/base/summary.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_name": "Qwen/Qwen2-1.5B-Instruct",
3
+ "split": "gsm8k_test",
4
+ "num_examples": 1319,
5
+ "num_chains": 1,
6
+ "mean_metrics": {
7
+ "rewards/correctness_reward_func": 0.4200151630022744,
8
+ "rewards/int_reward_func": 0.46626231993934797,
9
+ "rewards/strict_format_reward_func": 0.10765731614859743,
10
+ "rewards/soft_format_reward_func": 0.49583017437452614,
11
+ "rewards/xmlcount_reward_func": 0.49757316153301623,
12
+ "reward": 1.987338135020357,
13
+ "accuracy": 0.2100075815011372,
14
+ "pass_at_n": 0.2100075815011372
15
+ },
16
+ "avg_of_n_accuracy_pct": 21.00075815011372,
17
+ "pass_at_n_pct": 21.00075815011372
18
+ }