Update README.md
Browse files
README.md
CHANGED
|
@@ -43,4 +43,28 @@ However, as an open-source replica trained on a subset of data compared to the o
|
|
| 43 |
|
| 44 |
| Tasks |Version|Filter|n-shot|Metric|Value | |Stderr|
|
| 45 |
|--------------|------:|------|-----:|------|-----:|---|-----:|
|
| 46 |
-
|truthfulqa_mc2| 2|none | 0|acc |0.5182|± |0.0152|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
| Tasks |Version|Filter|n-shot|Metric|Value | |Stderr|
|
| 45 |
|--------------|------:|------|-----:|------|-----:|---|-----:|
|
| 46 |
+
|truthfulqa_mc2| 2|none | 0|acc |0.5182|± |0.0152|
|
| 47 |
+
|ai2_arc |N/A |none | 0|acc |0.7060|± |0.0073|
|
| 48 |
+
| | |none | 0|acc_norm|0.7049|± |0.0074|
|
| 49 |
+
| - arc_challenge | 1|none | 0|acc |0.5000|± |0.0146|
|
| 50 |
+
| | |none | 0|acc_norm|0.5299|± |0.0146|
|
| 51 |
+
| - arc_easy | 1|none | 0|acc |0.8077|± |0.0081|
|
| 52 |
+
| | |none | 0|acc_norm|0.7912|± |0.0083|
|
| 53 |
+
|agieval_nous |N/A |none | 0|acc |0.3778|± |0.0093|
|
| 54 |
+
| | |none | 0|acc_norm|0.3574|± |0.0093|
|
| 55 |
+
| - agieval_aqua_rat | 1|none | 0|acc |0.2402|± |0.0269|
|
| 56 |
+
| | |none | 0|acc_norm|0.2205|± |0.0261|
|
| 57 |
+
| - agieval_logiqa_en | 1|none | 0|acc |0.3164|± |0.0182|
|
| 58 |
+
| | |none | 0|acc_norm|0.3656|± |0.0189|
|
| 59 |
+
| - agieval_lsat_ar | 1|none | 0|acc |0.2130|± |0.0271|
|
| 60 |
+
| | |none | 0|acc_norm|0.1913|± |0.0260|
|
| 61 |
+
| - agieval_lsat_lr | 1|none | 0|acc |0.4078|± |0.0218|
|
| 62 |
+
| | |none | 0|acc_norm|0.3647|± |0.0213|
|
| 63 |
+
| - agieval_lsat_rc | 1|none | 0|acc |0.4981|± |0.0305|
|
| 64 |
+
| | |none | 0|acc_norm|0.4498|± |0.0304|
|
| 65 |
+
| - agieval_sat_en | 1|none | 0|acc |0.6650|± |0.0330|
|
| 66 |
+
| | |none | 0|acc_norm|0.5922|± |0.0343|
|
| 67 |
+
| - agieval_sat_en_without_passage| 1|none | 0|acc |0.4612|± |0.0348|
|
| 68 |
+
| | |none | 0|acc_norm|0.3932|± |0.0341|
|
| 69 |
+
| - agieval_sat_math | 1|none | 0|acc |0.3273|± |0.0317|
|
| 70 |
+
| | |none | 0|acc_norm|0.2818|± |0.0304|
|