Spaces:
Build error
Build error
update with gpt5 results
Browse files
data/leaderboard_json/afrobench_lite.json
CHANGED
|
@@ -22,7 +22,8 @@
|
|
| 22 |
"Claude 3.7 Sonnet": 58.8,
|
| 23 |
"Claude 4.5 Sonnet": 69.9,
|
| 24 |
"Gemini-2.5 Flash": 69.3,
|
| 25 |
-
"Gemini-2.5 Pro": 72.5
|
|
|
|
| 26 |
}
|
| 27 |
},
|
| 28 |
"Intent": {
|
|
@@ -48,7 +49,8 @@
|
|
| 48 |
"Claude 3.7 Sonnet": 72.2,
|
| 49 |
"Claude 4.5 Sonnet": 79.3,
|
| 50 |
"Gemini-2.5 Flash": 87.4,
|
| 51 |
-
"Gemini-2.5 Pro": 88.0
|
|
|
|
| 52 |
}
|
| 53 |
},
|
| 54 |
"MT(en/fr-xx)": {
|
|
@@ -74,7 +76,8 @@
|
|
| 74 |
"Claude 3.7 Sonnet": 42.9,
|
| 75 |
"Claude 4.5 Sonnet": 45.2,
|
| 76 |
"Gemini-2.5 Flash": 45.3,
|
| 77 |
-
"Gemini-2.5 Pro": 46.3
|
|
|
|
| 78 |
}
|
| 79 |
},
|
| 80 |
"MMLU": {
|
|
@@ -100,7 +103,8 @@
|
|
| 100 |
"Claude 3.7 Sonnet": 65.3,
|
| 101 |
"Claude 4.5 Sonnet": 74.0,
|
| 102 |
"Gemini-2.5 Flash": 67.3,
|
| 103 |
-
"Gemini-2.5 Pro": 77.4
|
|
|
|
| 104 |
}
|
| 105 |
},
|
| 106 |
"Math": {
|
|
@@ -126,7 +130,8 @@
|
|
| 126 |
"Claude 3.7 Sonnet": 33.9,
|
| 127 |
"Claude 4.5 Sonnet": 69.7,
|
| 128 |
"Gemini-2.5 Flash": 69.3,
|
| 129 |
-
"Gemini-2.5 Pro": 73.2
|
|
|
|
| 130 |
}
|
| 131 |
},
|
| 132 |
"Topic": {
|
|
@@ -152,7 +157,8 @@
|
|
| 152 |
"Claude 3.7 Sonnet": 84.5,
|
| 153 |
"Claude 4.5 Sonnet": 83.3,
|
| 154 |
"Gemini-2.5 Flash": 86.8,
|
| 155 |
-
"Gemini-2.5 Pro": 87.9
|
|
|
|
| 156 |
}
|
| 157 |
},
|
| 158 |
"RC": {
|
|
@@ -178,7 +184,8 @@
|
|
| 178 |
"Claude 3.7 Sonnet": 64.2,
|
| 179 |
"Claude 4.5 Sonnet": 72.8,
|
| 180 |
"Gemini-2.5 Flash": 41.6,
|
| 181 |
-
"Gemini-2.5 Pro": 76.4
|
|
|
|
| 182 |
}
|
| 183 |
}
|
| 184 |
}
|
|
|
|
| 22 |
"Claude 3.7 Sonnet": 58.8,
|
| 23 |
"Claude 4.5 Sonnet": 69.9,
|
| 24 |
"Gemini-2.5 Flash": 69.3,
|
| 25 |
+
"Gemini-2.5 Pro": 72.5,
|
| 26 |
+
"GPT-5 (Aug)": 83.3
|
| 27 |
}
|
| 28 |
},
|
| 29 |
"Intent": {
|
|
|
|
| 49 |
"Claude 3.7 Sonnet": 72.2,
|
| 50 |
"Claude 4.5 Sonnet": 79.3,
|
| 51 |
"Gemini-2.5 Flash": 87.4,
|
| 52 |
+
"Gemini-2.5 Pro": 88.0,
|
| 53 |
+
"GPT-5 (Aug)": 87.3
|
| 54 |
}
|
| 55 |
},
|
| 56 |
"MT(en/fr-xx)": {
|
|
|
|
| 76 |
"Claude 3.7 Sonnet": 42.9,
|
| 77 |
"Claude 4.5 Sonnet": 45.2,
|
| 78 |
"Gemini-2.5 Flash": 45.3,
|
| 79 |
+
"Gemini-2.5 Pro": 46.3,
|
| 80 |
+
"GPT-5 (Aug)": 44.8
|
| 81 |
}
|
| 82 |
},
|
| 83 |
"MMLU": {
|
|
|
|
| 103 |
"Claude 3.7 Sonnet": 65.3,
|
| 104 |
"Claude 4.5 Sonnet": 74.0,
|
| 105 |
"Gemini-2.5 Flash": 67.3,
|
| 106 |
+
"Gemini-2.5 Pro": 77.4,
|
| 107 |
+
"GPT-5 (Aug)": 83.3
|
| 108 |
}
|
| 109 |
},
|
| 110 |
"Math": {
|
|
|
|
| 130 |
"Claude 3.7 Sonnet": 33.9,
|
| 131 |
"Claude 4.5 Sonnet": 69.7,
|
| 132 |
"Gemini-2.5 Flash": 69.3,
|
| 133 |
+
"Gemini-2.5 Pro": 73.2,
|
| 134 |
+
"GPT-5 (Aug)": 73.7
|
| 135 |
}
|
| 136 |
},
|
| 137 |
"Topic": {
|
|
|
|
| 157 |
"Claude 3.7 Sonnet": 84.5,
|
| 158 |
"Claude 4.5 Sonnet": 83.3,
|
| 159 |
"Gemini-2.5 Flash": 86.8,
|
| 160 |
+
"Gemini-2.5 Pro": 87.9,
|
| 161 |
+
"GPT-5 (Aug)": 88.5
|
| 162 |
}
|
| 163 |
},
|
| 164 |
"RC": {
|
|
|
|
| 184 |
"Claude 3.7 Sonnet": 64.2,
|
| 185 |
"Claude 4.5 Sonnet": 72.8,
|
| 186 |
"Gemini-2.5 Flash": 41.6,
|
| 187 |
+
"Gemini-2.5 Pro": 76.4,
|
| 188 |
+
"GPT-5 (Aug)": 83.3
|
| 189 |
}
|
| 190 |
}
|
| 191 |
}
|