Spaces:
Running
Running
| [ | |
| { | |
| "benchmark": "ace", | |
| "model_count": 12 | |
| }, | |
| { | |
| "benchmark": "apex-agents", | |
| "model_count": 20 | |
| }, | |
| { | |
| "benchmark": "apex-v1", | |
| "model_count": 10 | |
| }, | |
| { | |
| "benchmark": "appworld_test_normal", | |
| "model_count": 3 | |
| }, | |
| { | |
| "benchmark": "bfcl", | |
| "model_count": 109 | |
| }, | |
| { | |
| "benchmark": "browsecompplus", | |
| "model_count": 3 | |
| }, | |
| { | |
| "benchmark": "global-mmlu-lite", | |
| "model_count": 27 | |
| }, | |
| { | |
| "benchmark": "helm_capabilities", | |
| "model_count": 61 | |
| }, | |
| { | |
| "benchmark": "helm_classic", | |
| "model_count": 67 | |
| }, | |
| { | |
| "benchmark": "helm_instruct", | |
| "model_count": 4 | |
| }, | |
| { | |
| "benchmark": "helm_lite", | |
| "model_count": 91 | |
| }, | |
| { | |
| "benchmark": "helm_mmlu", | |
| "model_count": 79 | |
| }, | |
| { | |
| "benchmark": "hfopenllm_v2", | |
| "model_count": 4493 | |
| }, | |
| { | |
| "benchmark": "la_leaderboard", | |
| "model_count": 5 | |
| }, | |
| { | |
| "benchmark": "livecodebenchpro", | |
| "model_count": 27 | |
| }, | |
| { | |
| "benchmark": "reward-bench", | |
| "model_count": 328 | |
| }, | |
| { | |
| "benchmark": "swe-bench", | |
| "model_count": 3 | |
| }, | |
| { | |
| "benchmark": "tau-bench-2_airline", | |
| "model_count": 3 | |
| }, | |
| { | |
| "benchmark": "tau-bench-2_retail", | |
| "model_count": 3 | |
| }, | |
| { | |
| "benchmark": "tau-bench-2_telecom", | |
| "model_count": 3 | |
| }, | |
| { | |
| "benchmark": "terminal-bench-2.0", | |
| "model_count": 37 | |
| }, | |
| { | |
| "benchmark": "theory_of_mind", | |
| "model_count": 1 | |
| } | |
| ] |