Update display names
Browse files- data/results.json +6 -6
- data/tasks.json +2 -2
data/results.json
CHANGED
|
@@ -268,7 +268,7 @@
|
|
| 268 |
},
|
| 269 |
"c4ai-command-r-plus": {
|
| 270 |
"config": {
|
| 271 |
-
"model_name": "
|
| 272 |
"model_sha": "https://huggingface.co/CohereForAI/c4ai-command-r-plus"
|
| 273 |
},
|
| 274 |
"results": {
|
|
@@ -324,7 +324,7 @@
|
|
| 324 |
},
|
| 325 |
"claude-3-5-sonnet-20241022": {
|
| 326 |
"config": {
|
| 327 |
-
"model_name": "
|
| 328 |
"model_sha": "https://www.anthropic.com/claude/sonnet",
|
| 329 |
"model_dtype": "torch.float16"
|
| 330 |
},
|
|
@@ -413,7 +413,7 @@
|
|
| 413 |
},
|
| 414 |
"gemini-1.5-flash": {
|
| 415 |
"config": {
|
| 416 |
-
"model_name": "
|
| 417 |
"model_sha": "https://deepmind.google/technologies/gemini/flash",
|
| 418 |
"model_dtype": "torch.float16"
|
| 419 |
},
|
|
@@ -502,7 +502,7 @@
|
|
| 502 |
},
|
| 503 |
"gemini-1.5-pro": {
|
| 504 |
"config": {
|
| 505 |
-
"model_name": "
|
| 506 |
"model_sha": "https://deepmind.google/technologies/gemini/pro",
|
| 507 |
"model_dtype": "torch.float16"
|
| 508 |
},
|
|
@@ -591,7 +591,7 @@
|
|
| 591 |
},
|
| 592 |
"gpt-4o": {
|
| 593 |
"config": {
|
| 594 |
-
"model_name": "
|
| 595 |
"model_sha": "https://openai.com/index/hello-gpt-4o",
|
| 596 |
"model_dtype": "torch.float16"
|
| 597 |
},
|
|
@@ -680,7 +680,7 @@
|
|
| 680 |
},
|
| 681 |
"gpt-4o-mini": {
|
| 682 |
"config": {
|
| 683 |
-
"model_name": "
|
| 684 |
"model_sha": "https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence",
|
| 685 |
"model_dtype": "torch.float16"
|
| 686 |
},
|
|
|
|
| 268 |
},
|
| 269 |
"c4ai-command-r-plus": {
|
| 270 |
"config": {
|
| 271 |
+
"model_name": "Command R+",
|
| 272 |
"model_sha": "https://huggingface.co/CohereForAI/c4ai-command-r-plus"
|
| 273 |
},
|
| 274 |
"results": {
|
|
|
|
| 324 |
},
|
| 325 |
"claude-3-5-sonnet-20241022": {
|
| 326 |
"config": {
|
| 327 |
+
"model_name": "Claude-3.5-Sonnet",
|
| 328 |
"model_sha": "https://www.anthropic.com/claude/sonnet",
|
| 329 |
"model_dtype": "torch.float16"
|
| 330 |
},
|
|
|
|
| 413 |
},
|
| 414 |
"gemini-1.5-flash": {
|
| 415 |
"config": {
|
| 416 |
+
"model_name": "Gemini-1.5-Flash",
|
| 417 |
"model_sha": "https://deepmind.google/technologies/gemini/flash",
|
| 418 |
"model_dtype": "torch.float16"
|
| 419 |
},
|
|
|
|
| 502 |
},
|
| 503 |
"gemini-1.5-pro": {
|
| 504 |
"config": {
|
| 505 |
+
"model_name": "Gemini-1.5-Pro",
|
| 506 |
"model_sha": "https://deepmind.google/technologies/gemini/pro",
|
| 507 |
"model_dtype": "torch.float16"
|
| 508 |
},
|
|
|
|
| 591 |
},
|
| 592 |
"gpt-4o": {
|
| 593 |
"config": {
|
| 594 |
+
"model_name": "GPT-4o",
|
| 595 |
"model_sha": "https://openai.com/index/hello-gpt-4o",
|
| 596 |
"model_dtype": "torch.float16"
|
| 597 |
},
|
|
|
|
| 680 |
},
|
| 681 |
"gpt-4o-mini": {
|
| 682 |
"config": {
|
| 683 |
+
"model_name": "GPT-4o-mini",
|
| 684 |
"model_sha": "https://openai.com/index/gpt-4o-mini-advancing-cost-efficient-intelligence",
|
| 685 |
"model_dtype": "torch.float16"
|
| 686 |
},
|
data/tasks.json
CHANGED
|
@@ -86,14 +86,14 @@
|
|
| 86 |
"mmmu_multiple_choice": {
|
| 87 |
"benchmark": "mmmu_multiple_choice",
|
| 88 |
"metric": "accuracy",
|
| 89 |
-
"display_name": "MMMU-
|
| 90 |
"type": "base",
|
| 91 |
"source": "https://github.com/UKGovernmentBEIS/inspect_evals/tree/main/src/inspect_evals/mmmu"
|
| 92 |
},
|
| 93 |
"mmmu_open": {
|
| 94 |
"benchmark": "mmmu_open",
|
| 95 |
"metric": "accuracy",
|
| 96 |
-
"display_name": "MMMU-
|
| 97 |
"type": "base",
|
| 98 |
"source": "https://github.com/UKGovernmentBEIS/inspect_evals/tree/main/src/inspect_evals/mmmu"
|
| 99 |
},
|
|
|
|
| 86 |
"mmmu_multiple_choice": {
|
| 87 |
"benchmark": "mmmu_multiple_choice",
|
| 88 |
"metric": "accuracy",
|
| 89 |
+
"display_name": "MMMU-MC",
|
| 90 |
"type": "base",
|
| 91 |
"source": "https://github.com/UKGovernmentBEIS/inspect_evals/tree/main/src/inspect_evals/mmmu"
|
| 92 |
},
|
| 93 |
"mmmu_open": {
|
| 94 |
"benchmark": "mmmu_open",
|
| 95 |
"metric": "accuracy",
|
| 96 |
+
"display_name": "MMMU-OE",
|
| 97 |
"type": "base",
|
| 98 |
"source": "https://github.com/UKGovernmentBEIS/inspect_evals/tree/main/src/inspect_evals/mmmu"
|
| 99 |
},
|