Update README.md
Browse files
README.md
CHANGED
|
@@ -58,8 +58,8 @@ Average: 75.9% without mmlu
|
|
| 58 |
### TruthfulQA
|
| 59 |
| Task |Version|Metric|Value| |Stderr|
|
| 60 |
|-------------|------:|------|----:|---|-----:|
|
| 61 |
-
|truthfulqa_mc| 1|mc1 |
|
| 62 |
-
| | |mc2 |
|
| 63 |
|
| 64 |
### BigBench Reasoning Test
|
| 65 |
|
|
@@ -85,8 +85,24 @@ Average: 75.9% without mmlu
|
|
| 85 |
| bigbench_tracking_shuffled_objects_five_objects| 0| multiple_choice_grade | 23.28 | _ | 1.20 |
|
| 86 |
| bigbench_tracking_shuffled_objects_seven_objects| 0| multiple_choice_grade | 19.37 | _ | 0.94 |
|
| 87 |
| bigbench_tracking_shuffled_objects_three_objects| 0| multiple_choice_grade | 59.33 | _ | 2.84 |
|
|
|
|
| 88 |
Average: 49.08%
|
| 89 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
|
| 91 |
### Training hyperparameters
|
| 92 |
|
|
|
|
| 58 |
### TruthfulQA
|
| 59 |
| Task |Version|Metric|Value| |Stderr|
|
| 60 |
|-------------|------:|------|----:|---|-----:|
|
| 61 |
+
|truthfulqa_mc| 1|mc1 |63.03|± | 1.68|
|
| 62 |
+
| | |mc2 |78.39|± | 1.37|
|
| 63 |
|
| 64 |
### BigBench Reasoning Test
|
| 65 |
|
|
|
|
| 85 |
| bigbench_tracking_shuffled_objects_five_objects| 0| multiple_choice_grade | 23.28 | _ | 1.20 |
|
| 86 |
| bigbench_tracking_shuffled_objects_seven_objects| 0| multiple_choice_grade | 19.37 | _ | 0.94 |
|
| 87 |
| bigbench_tracking_shuffled_objects_three_objects| 0| multiple_choice_grade | 59.33 | _ | 2.84 |
|
| 88 |
+
|
| 89 |
Average: 49.08%
|
| 90 |
|
| 91 |
+
### GPT4ALL
|
| 92 |
+
|
| 93 |
+
Task Version Metric Value Stderr
|
| 94 |
+
arc_challenge 0 acc 0.6630 _ 0.0138
|
| 95 |
+
acc_norm 0.6826 _ 0.0136
|
| 96 |
+
arc_easy 0 acc 0.8657 _ 0.0070
|
| 97 |
+
acc_norm 0.8081 _ 0.0081
|
| 98 |
+
boolq 1 acc 0.8716 _ 0.0059
|
| 99 |
+
hellaswag 0 acc 0.6960 _ 0.0046
|
| 100 |
+
acc_norm 0.8745 _ 0.0033
|
| 101 |
+
openbookqa 0 acc 0.3920 _ 0.0219
|
| 102 |
+
acc_norm 0.4960 _ 0.0224
|
| 103 |
+
piqa 0 acc 0.8303 _ 0.0088
|
| 104 |
+
acc_norm 0.8487 _ 0.0084
|
| 105 |
+
winogrande 0 acc 0.8106 _ 0.0110
|
| 106 |
|
| 107 |
### Training hyperparameters
|
| 108 |
|