Spaces:
Running
Running
Update small.json
Browse files- small.json +11 -11
small.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
| 5 |
"MMLU": 36.97,
|
| 6 |
"ARC":60.94,
|
| 7 |
"WinoGrande": 46.88,
|
| 8 |
-
"
|
| 9 |
"CommonsenseQA": 49.15,
|
| 10 |
"Race": 37.81,
|
| 11 |
"MedMCQA": 22.61,
|
|
@@ -17,7 +17,7 @@
|
|
| 17 |
"MMLU": 9.99,
|
| 18 |
"ARC":15.84 ,
|
| 19 |
"WinoGrande": 40.96,
|
| 20 |
-
"
|
| 21 |
"CommonsenseQA": 31.13,
|
| 22 |
"Race": 34.91,
|
| 23 |
"MedMCQA": 4.7,
|
|
@@ -29,7 +29,7 @@
|
|
| 29 |
"MMLU": 17.52,
|
| 30 |
"ARC":23.93,
|
| 31 |
"WinoGrande": 16.10,
|
| 32 |
-
"
|
| 33 |
"CommonsenseQA": 27.46,
|
| 34 |
"Race": 14.32,
|
| 35 |
"MedMCQA": 4.57,
|
|
@@ -42,7 +42,7 @@
|
|
| 42 |
"MMLU": 9.22,
|
| 43 |
"ARC":14.95,
|
| 44 |
"WinoGrande": 14.76,
|
| 45 |
-
"
|
| 46 |
"CommonsenseQA": 9.01,
|
| 47 |
"Race": 16.19,
|
| 48 |
"MedMCQA": 1.68,
|
|
@@ -54,7 +54,7 @@
|
|
| 54 |
"MMLU": 9.21,
|
| 55 |
"ARC":13.5,
|
| 56 |
"WinoGrande": 16.97,
|
| 57 |
-
"
|
| 58 |
"CommonsenseQA": 11.41,
|
| 59 |
"Race": 14.35,
|
| 60 |
"MedMCQA": 1.86,
|
|
@@ -66,7 +66,7 @@
|
|
| 66 |
"MMLU": 8.54,
|
| 67 |
"ARC":13.18,
|
| 68 |
"WinoGrande": 6.16,
|
| 69 |
-
"
|
| 70 |
"CommonsenseQA": 13.10,
|
| 71 |
"Race": 13.61,
|
| 72 |
"MedMCQA": 2.07,
|
|
@@ -78,7 +78,7 @@
|
|
| 78 |
"MMLU": 9.66,
|
| 79 |
"ARC":14.69,
|
| 80 |
"WinoGrande": 11.52,
|
| 81 |
-
"
|
| 82 |
"CommonsenseQA": 9.01,
|
| 83 |
"Race": 12.76,
|
| 84 |
"MedMCQA": 3.19,
|
|
@@ -90,7 +90,7 @@
|
|
| 90 |
"MMLU": 8.94,
|
| 91 |
"ARC":13.31,
|
| 92 |
"WinoGrande": 12.23,
|
| 93 |
-
"
|
| 94 |
"CommonsenseQA": 6.06,
|
| 95 |
"Race": 16.7,
|
| 96 |
"MedMCQA": 2.07,
|
|
@@ -102,7 +102,7 @@
|
|
| 102 |
"MMLU": 7.40,
|
| 103 |
"ARC":11.83,
|
| 104 |
"WinoGrande": 12.47,
|
| 105 |
-
"
|
| 106 |
"CommonsenseQA": 7.61,
|
| 107 |
"Race": 13.61,
|
| 108 |
"MedMCQA": 1.25,
|
|
@@ -114,7 +114,7 @@
|
|
| 114 |
"MMLU": 6.94,
|
| 115 |
"ARC": 6.69,
|
| 116 |
"WinoGrande": 10.81,
|
| 117 |
-
"
|
| 118 |
"CommonsenseQA": 6.34,
|
| 119 |
"Race": 13.75,
|
| 120 |
"MedMCQA": 2.63,
|
|
@@ -126,7 +126,7 @@
|
|
| 126 |
"MMLU": 5.37,
|
| 127 |
"ARC":4.43,
|
| 128 |
"WinoGrande": 9.31,
|
| 129 |
-
"
|
| 130 |
"CommonsenseQA": 6.2,
|
| 131 |
"Race": 6.9,
|
| 132 |
"MedMCQA": 1.04,
|
|
|
|
| 5 |
"MMLU": 36.97,
|
| 6 |
"ARC":60.94,
|
| 7 |
"WinoGrande": 46.88,
|
| 8 |
+
"PIQA": 32.04,
|
| 9 |
"CommonsenseQA": 49.15,
|
| 10 |
"Race": 37.81,
|
| 11 |
"MedMCQA": 22.61,
|
|
|
|
| 17 |
"MMLU": 9.99,
|
| 18 |
"ARC":15.84 ,
|
| 19 |
"WinoGrande": 40.96,
|
| 20 |
+
"PIQA": 15.52,
|
| 21 |
"CommonsenseQA": 31.13,
|
| 22 |
"Race": 34.91,
|
| 23 |
"MedMCQA": 4.7,
|
|
|
|
| 29 |
"MMLU": 17.52,
|
| 30 |
"ARC":23.93,
|
| 31 |
"WinoGrande": 16.10,
|
| 32 |
+
"PIQA": 15.09,
|
| 33 |
"CommonsenseQA": 27.46,
|
| 34 |
"Race": 14.32,
|
| 35 |
"MedMCQA": 4.57,
|
|
|
|
| 42 |
"MMLU": 9.22,
|
| 43 |
"ARC":14.95,
|
| 44 |
"WinoGrande": 14.76,
|
| 45 |
+
"PIQA": 5.32,
|
| 46 |
"CommonsenseQA": 9.01,
|
| 47 |
"Race": 16.19,
|
| 48 |
"MedMCQA": 1.68,
|
|
|
|
| 54 |
"MMLU": 9.21,
|
| 55 |
"ARC":13.5,
|
| 56 |
"WinoGrande": 16.97,
|
| 57 |
+
"PIQA": 0.86,
|
| 58 |
"CommonsenseQA": 11.41,
|
| 59 |
"Race": 14.35,
|
| 60 |
"MedMCQA": 1.86,
|
|
|
|
| 66 |
"MMLU": 8.54,
|
| 67 |
"ARC":13.18,
|
| 68 |
"WinoGrande": 6.16,
|
| 69 |
+
"PIQA": 8.05,
|
| 70 |
"CommonsenseQA": 13.10,
|
| 71 |
"Race": 13.61,
|
| 72 |
"MedMCQA": 2.07,
|
|
|
|
| 78 |
"MMLU": 9.66,
|
| 79 |
"ARC":14.69,
|
| 80 |
"WinoGrande": 11.52,
|
| 81 |
+
"PIQA": 4.17,
|
| 82 |
"CommonsenseQA": 9.01,
|
| 83 |
"Race": 12.76,
|
| 84 |
"MedMCQA": 3.19,
|
|
|
|
| 90 |
"MMLU": 8.94,
|
| 91 |
"ARC":13.31,
|
| 92 |
"WinoGrande": 12.23,
|
| 93 |
+
"PIQA": 3.59,
|
| 94 |
"CommonsenseQA": 6.06,
|
| 95 |
"Race": 16.7,
|
| 96 |
"MedMCQA": 2.07,
|
|
|
|
| 102 |
"MMLU": 7.40,
|
| 103 |
"ARC":11.83,
|
| 104 |
"WinoGrande": 12.47,
|
| 105 |
+
"PIQA": 4.48,
|
| 106 |
"CommonsenseQA": 7.61,
|
| 107 |
"Race": 13.61,
|
| 108 |
"MedMCQA": 1.25,
|
|
|
|
| 114 |
"MMLU": 6.94,
|
| 115 |
"ARC": 6.69,
|
| 116 |
"WinoGrande": 10.81,
|
| 117 |
+
"PIQA": 4.31,
|
| 118 |
"CommonsenseQA": 6.34,
|
| 119 |
"Race": 13.75,
|
| 120 |
"MedMCQA": 2.63,
|
|
|
|
| 126 |
"MMLU": 5.37,
|
| 127 |
"ARC":4.43,
|
| 128 |
"WinoGrande": 9.31,
|
| 129 |
+
"PIQA": 2.16,
|
| 130 |
"CommonsenseQA": 6.2,
|
| 131 |
"Race": 6.9,
|
| 132 |
"MedMCQA": 1.04,
|