Spaces:
Runtime error
Runtime error
| ,model,score,scenario,source,aggragated_from,tag | |
| 0,gpt_4_turbo_2024_04_09,82.6,arena_hard,arena_hard_2404,[],holistic | |
| 1,gpt_4_0125_preview,78.0,arena_hard,arena_hard_2404,[],holistic | |
| 2,gemini_1.5_pro_api_preview,72.0,arena_hard,arena_hard_2404,[],holistic | |
| 3,yi_large,63.7,arena_hard,arena_hard_2404,[],holistic | |
| 4,claude_3_opus_20240229,60.4,arena_hard,arena_hard_2404,[],holistic | |
| 5,glm_4,55.7,arena_hard,arena_hard_2404,[],holistic | |
| 6,gpt_4_0314,50.0,arena_hard,arena_hard_2404,[],holistic | |
| 7,gemini_1.5_flash_api_preview,49.6,arena_hard,arena_hard_2404,[],holistic | |
| 8,claude_3_sonnet_20240229,46.8,arena_hard,arena_hard_2404,[],holistic | |
| 9,claude_3_haiku_20240307,41.5,arena_hard,arena_hard_2404,[],holistic | |
| 10,llama_3_70b_chat,41.1,arena_hard,arena_hard_2404,[],holistic | |
| 11,gpt_4_0613,37.9,arena_hard,arena_hard_2404,[],holistic | |
| 12,mistral_large_2402,37.7,arena_hard,arena_hard_2404,[],holistic | |
| 13,mixtral_8x22b_instruct_v0.1,36.4,arena_hard,arena_hard_2404,[],holistic | |
| 14,qwen1.5_72b_chat,36.1,arena_hard,arena_hard_2404,[],holistic | |
| 15,command_r_plus,33.1,arena_hard,arena_hard_2404,[],holistic | |
| 16,mistral_medium,31.9,arena_hard,arena_hard_2404,[],holistic | |
| 17,mistral_next,27.4,arena_hard,arena_hard_2404,[],holistic | |
| 18,gpt_3.5_turbo_0613,24.8,arena_hard,arena_hard_2404,[],holistic | |
| 19,claude_2.0,24.0,arena_hard,arena_hard_2404,[],holistic | |
| 20,dbrx_instructruct,23.9,arena_hard,arena_hard_2404,[],holistic | |
| 21,mixtral_8x7b_instruct_v0.1,23.4,arena_hard,arena_hard_2404,[],holistic | |
| 22,gpt_3.5_turbo_0125,23.3,arena_hard,arena_hard_2404,[],holistic | |
| 23,yi_34b_chat,23.1,arena_hard,arena_hard_2404,[],holistic | |
| 24,starling_lm_7b_beta,23.0,arena_hard,arena_hard_2404,[],holistic | |
| 25,claude_2.1,22.8,arena_hard,arena_hard_2404,[],holistic | |
| 26,snorkel_mistral_pairrm_dpo,20.7,arena_hard,arena_hard_2404,[],holistic | |
| 27,llama_3_8b_chat,20.6,arena_hard,arena_hard_2404,[],holistic | |
| 28,gpt_3.5_turbo_1106,18.9,arena_hard,arena_hard_2404,[],holistic | |
| 29,gpt_3.5_turbo_0301,18.1,arena_hard,arena_hard_2404,[],holistic | |
| 30,gemini_1.0_pro,17.8,arena_hard,arena_hard_2404,[],holistic | |
| 31,snowflake_arctic_instruct,17.6,arena_hard,arena_hard_2404,[],holistic | |
| 32,command_r,17.0,arena_hard,arena_hard_2404,[],holistic | |
| 33,phi_3_mini_128k_instruct,15.4,arena_hard,arena_hard_2404,[],holistic | |
| 34,tulu_2_dpo_70b,15.0,arena_hard,arena_hard_2404,[],holistic | |
| 35,starling_lm_7b_alpha,12.8,arena_hard,arena_hard_2404,[],holistic | |
| 36,mistral_7b_instruct,12.6,arena_hard,arena_hard_2404,[],holistic | |
| 37,gemma_1.1_7b_it,12.1,arena_hard,arena_hard_2404,[],holistic | |
| 38,llama_2_70b_chat,11.6,arena_hard,arena_hard_2404,[],holistic | |
| 39,vicuna_33b_v1.3,8.6,arena_hard,arena_hard_2404,[],holistic | |
| 40,gemma_7b_it,7.5,arena_hard,arena_hard_2404,[],holistic | |
| 41,llama_2_7b_chat,4.6,arena_hard,arena_hard_2404,[],holistic | |
| 42,gemma_1.1_2b_it,3.4,arena_hard,arena_hard_2404,[],holistic | |
| 43,gemma_2b_it,3.0,arena_hard,arena_hard_2404,[],holistic | |
| 0,gpt_4o_2024_05_13,64.7,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 1,claude_3_opus,63.5,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 2,gpt_4_turbo_2024_04_09,62.6,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 3,gemini_1.5_pro_api_0409,58.7,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 4,yi_large_preview,56.8,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 5,llama_3_70b_instruct,55.9,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 6,qwen_max_0428,55.8,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 7,claude_3_sonnet,54.0,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 8,reka_core_20240415,52.9,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 9,mammoth2_8x7b_plus,51.8,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 10,deepseek_v2,51.7,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 11,command_r_plus,51.4,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 12,yi_1.5_34b_chat,51.2,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 13,mistral_large,50.3,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 14,qwen1.5_72b_chat,48.3,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 15,mistral_medium,47.8,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 16,gemini_1.0_pro,46.4,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 17,reka_flash_20240226,46.2,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 18,mistral_small,46.2,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 19,llama_3_8b_instruct,45.6,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 20,command_r,45.2,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 21,qwen1.5_32b_chat,43.3,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 22,gpt_3.5_turbo_0125,43.0,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 23,claude_3_haiku,42.8,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 24,yi_34b_chat,42.6,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 25,mixtral_8x7b_instruct_v0.1,42.5,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 26,starling_lm_7b_beta,41.8,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 27,yi_1.5_9b_chat,40.9,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 28,gemma_1.1_7b_it,39.1,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 29,vicuna_33b_v1.3,38.7,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 30,llama_2_70b_chat,38.0,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 31,map_neo_instruct_v0.1,37.8,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 32,mistral_7b_instruct_v0.2,36.2,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 33,qwen1.5_7b_chat,35.5,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 34,reka_edge_20240208,32.2,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 35,zephyr_7b_beta,31.6,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 36,llama_2_7b_chat,30.8,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 37,yi_6b_chat,30.1,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 38,qwen1.5_moe_a2.7b_chat,29.1,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 39,gemma_1.1_2b_it,28.4,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 40,vicuna_7b_v1.5,27.8,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 41,olmo_7b_instruct,26.7,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 42,qwen1.5_4b_chat,24.6,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 43,jetmoe_8b_chat,24.3,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 44,mpt_7b_chat,23.8,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 45,llama_3_70b,54.0,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 46,qwen1.5_72b,41.9,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 47,yi_34b,47.2,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 48,qwen1.5_32b,41.0,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 49,mixtral_8x7b,40.7,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 50,llama_2_70b,41.6,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 51,qwen1.5_moe_a2.7b,33.5,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 52,qwen1.5_7b,33.7,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 53,llama_3_8b,31.7,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 54,mistral_7b,27.1,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 55,gemma_7b,32.7,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 56,yi_6b,30.4,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 57,qwen1.5_4b,23.5,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 58,jetmoe_8b,27.0,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 59,deepseek_7b,21.7,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 60,phi_2,21.9,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 61,deepseekmoe_16b,24.2,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 62,llama_2_7b,22.1,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 63,gemma_2b,22.6,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 64,olmo_7b,21.2,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 65,mpt_7b,17.4,mixeval_hard-mixed,mixeval_240601,[],holistic | |
| 66,gpt_4o_2024_05_13,87.9,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 67,claude_3_opus,88.1,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 68,gpt_4_turbo_2024_04_09,88.8,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 69,gemini_1.5_pro_api_0409,84.2,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 70,yi_large_preview,84.4,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 71,llama_3_70b_instruct,84.0,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 72,qwen_max_0428,86.1,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 73,claude_3_sonnet,81.7,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 74,reka_core_20240415,83.3,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 75,mammoth2_8x7b_plus,81.5,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 76,deepseek_v2,83.7,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 77,command_r_plus,81.5,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 78,yi_1.5_34b_chat,81.7,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 79,mistral_large,84.2,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 80,qwen1.5_72b_chat,84.1,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 81,mistral_medium,81.9,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 82,gemini_1.0_pro,78.9,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 83,reka_flash_20240226,79.8,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 84,mistral_small,81.2,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 85,llama_3_8b_instruct,75.0,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 86,command_r,77.0,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 87,qwen1.5_32b_chat,81.0,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 88,gpt_3.5_turbo_0125,79.7,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 89,claude_3_haiku,79.7,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 90,yi_34b_chat,80.1,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 91,mixtral_8x7b_instruct_v0.1,76.4,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 92,starling_lm_7b_beta,74.8,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 93,yi_1.5_9b_chat,74.2,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 94,gemma_1.1_7b_it,69.6,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 95,vicuna_33b_v1.3,66.3,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 96,llama_2_70b_chat,74.6,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 97,map_neo_instruct_v0.1,70.0,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 98,mistral_7b_instruct_v0.2,70.0,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 99,qwen1.5_7b_chat,71.4,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 100,reka_edge_20240208,68.5,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 101,zephyr_7b_beta,69.1,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 102,llama_2_7b_chat,61.7,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 103,yi_6b_chat,65.6,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 104,qwen1.5_moe_a2.7b_chat,69.1,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 105,gemma_1.1_2b_it,51.9,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 106,vicuna_7b_v1.5,60.3,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 107,olmo_7b_instruct,55.0,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 108,qwen1.5_4b_chat,57.2,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 109,jetmoe_8b_chat,51.6,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 110,mpt_7b_chat,43.8,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 111,llama_3_70b,82.2,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 112,qwen1.5_72b,79.5,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 113,yi_34b,78.3,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 114,qwen1.5_32b,77.6,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 115,mixtral_8x7b,74.0,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 116,llama_2_70b,73.2,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 117,qwen1.5_moe_a2.7b,70.2,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 118,qwen1.5_7b,68.2,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 119,llama_3_8b,65.1,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 120,mistral_7b,64.8,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 121,gemma_7b,64.7,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 122,yi_6b,63.1,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 123,qwen1.5_4b,58.2,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 124,jetmoe_8b,57.1,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 125,deepseek_7b,52.2,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 126,phi_2,51.9,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 127,deepseekmoe_16b,51.4,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 128,llama_2_7b,43.1,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 129,gemma_2b,38.9,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 130,olmo_7b,31.8,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 131,mpt_7b,30.8,mixeval,mixeval_240601,"['mixeval_hard-mixed', 'arena_elo-mixed', 'triviaqa-mixed', 'mmlu-mixed', 'drop-mixed', 'hellaswag-mixed', 'commonsenseqa-mixed', 'triviaqa_hard-mixed', 'mmlu_hard-mixed', 'drop_hard-mixed', 'boolq-mixed']",holistic | |
| 132,gpt_4o_2024_05_13,1287.0,arena_elo-mixed,mixeval_240601,[],holistic | |
| 133,claude_3_opus,1248.0,arena_elo-mixed,mixeval_240601,[],holistic | |
| 134,gpt_4_turbo_2024_04_09,1256.0,arena_elo-mixed,mixeval_240601,[],holistic | |
| 135,gemini_1.5_pro_api_0409,1258.0,arena_elo-mixed,mixeval_240601,[],holistic | |
| 136,yi_large_preview,1239.0,arena_elo-mixed,mixeval_240601,[],holistic | |
| 137,llama_3_70b_instruct,1208.0,arena_elo-mixed,mixeval_240601,[],holistic | |
| 138,qwen_max_0428,1184.0,arena_elo-mixed,mixeval_240601,[],holistic | |
| 139,claude_3_sonnet,1201.0,arena_elo-mixed,mixeval_240601,[],holistic | |
| 143,command_r_plus,1189.0,arena_elo-mixed,mixeval_240601,[],holistic | |
| 145,mistral_large,1156.0,arena_elo-mixed,mixeval_240601,[],holistic | |
| 146,qwen1.5_72b_chat,1147.0,arena_elo-mixed,mixeval_240601,[],holistic | |
| 147,mistral_medium,1148.0,arena_elo-mixed,mixeval_240601,[],holistic | |
| 148,gemini_1.0_pro,1131.0,arena_elo-mixed,mixeval_240601,[],holistic | |
| 149,reka_flash_20240226,1148.0,arena_elo-mixed,mixeval_240601,[],holistic | |
| 151,llama_3_8b_instruct,1153.0,arena_elo-mixed,mixeval_240601,[],holistic | |
| 152,command_r,1147.0,arena_elo-mixed,mixeval_240601,[],holistic | |
| 153,qwen1.5_32b_chat,1126.0,arena_elo-mixed,mixeval_240601,[],holistic | |
| 154,gpt_3.5_turbo_0125,1102.0,arena_elo-mixed,mixeval_240601,[],holistic | |
| 155,claude_3_haiku,1178.0,arena_elo-mixed,mixeval_240601,[],holistic | |
| 156,yi_34b_chat,1111.0,arena_elo-mixed,mixeval_240601,[],holistic | |
| 157,mixtral_8x7b_instruct_v0.1,1114.0,arena_elo-mixed,mixeval_240601,[],holistic | |
| 158,starling_lm_7b_beta,1119.0,arena_elo-mixed,mixeval_240601,[],holistic | |
| 160,gemma_1.1_7b_it,1084.0,arena_elo-mixed,mixeval_240601,[],holistic | |
| 161,vicuna_33b_v1.3,1090.0,arena_elo-mixed,mixeval_240601,[],holistic | |
| 162,llama_2_70b_chat,1093.0,arena_elo-mixed,mixeval_240601,[],holistic | |
| 164,mistral_7b_instruct_v0.2,1072.0,arena_elo-mixed,mixeval_240601,[],holistic | |
| 165,qwen1.5_7b_chat,1069.0,arena_elo-mixed,mixeval_240601,[],holistic | |
| 168,llama_2_7b_chat,1037.0,arena_elo-mixed,mixeval_240601,[],holistic | |
| 171,gemma_1.1_2b_it,1019.0,arena_elo-mixed,mixeval_240601,[],holistic | |
| 172,vicuna_7b_v1.5,1004.0,arena_elo-mixed,mixeval_240601,[],holistic | |
| 173,olmo_7b_instruct,1015.0,arena_elo-mixed,mixeval_240601,[],holistic | |
| 174,qwen1.5_4b_chat,988.0,arena_elo-mixed,mixeval_240601,[],holistic | |
| 176,mpt_7b_chat,927.0,arena_elo-mixed,mixeval_240601,[],holistic | |
| 198,gpt_4o_2024_05_13,88.0,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 199,claude_3_opus,90.4,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 200,gpt_4_turbo_2024_04_09,91.2,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 201,gemini_1.5_pro_api_0409,85.3,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 202,yi_large_preview,81.7,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 203,llama_3_70b_instruct,83.1,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 204,qwen_max_0428,86.7,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 205,claude_3_sonnet,84.2,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 206,reka_core_20240415,82.8,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 207,mammoth2_8x7b_plus,83.0,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 208,deepseek_v2,84.4,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 209,command_r_plus,83.3,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 210,yi_1.5_34b_chat,78.4,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 211,mistral_large,88.3,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 212,qwen1.5_72b_chat,83.9,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 213,mistral_medium,86.8,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 214,gemini_1.0_pro,81.0,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 215,reka_flash_20240226,76.4,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 216,mistral_small,85.1,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 217,llama_3_8b_instruct,71.7,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 218,command_r,80.9,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 219,qwen1.5_32b_chat,75.7,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 220,gpt_3.5_turbo_0125,85.2,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 221,claude_3_haiku,79.9,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 222,yi_34b_chat,82.7,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 223,mixtral_8x7b_instruct_v0.1,82.5,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 224,starling_lm_7b_beta,75.1,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 225,yi_1.5_9b_chat,61.3,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 226,gemma_1.1_7b_it,64.3,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 227,vicuna_33b_v1.3,79.2,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 228,llama_2_70b_chat,80.0,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 229,map_neo_instruct_v0.1,62.1,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 230,mistral_7b_instruct_v0.2,73.7,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 231,qwen1.5_7b_chat,64.1,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 232,reka_edge_20240208,60.0,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 233,zephyr_7b_beta,74.7,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 234,llama_2_7b_chat,68.8,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 235,yi_6b_chat,66.1,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 236,qwen1.5_moe_a2.7b_chat,65.9,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 237,gemma_1.1_2b_it,53.7,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 238,vicuna_7b_v1.5,66.4,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 239,olmo_7b_instruct,51.7,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 240,qwen1.5_4b_chat,46.0,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 241,jetmoe_8b_chat,46.8,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 242,mpt_7b_chat,50.2,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 243,llama_3_70b,83.1,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 244,qwen1.5_72b,78.4,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 245,yi_34b,72.1,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 246,qwen1.5_32b,71.9,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 247,mixtral_8x7b,77.3,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 248,llama_2_70b,78.7,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 249,qwen1.5_moe_a2.7b,71.3,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 250,qwen1.5_7b,61.4,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 251,llama_3_8b,65.2,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 252,mistral_7b,67.2,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 253,gemma_7b,66.0,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 254,yi_6b,54.7,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 255,qwen1.5_4b,47.8,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 256,jetmoe_8b,53.4,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 257,deepseek_7b,58.7,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 258,phi_2,37.0,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 259,deepseekmoe_16b,64.2,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 260,llama_2_7b,55.5,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 261,gemma_2b,41.5,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 262,olmo_7b,38.4,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 263,mpt_7b,33.5,triviaqa-mixed,mixeval_240601,[],knowledge | |
| 264,gpt_4o_2024_05_13,85.4,mmlu-mixed,mixeval_240601,[],knowledge | |
| 265,claude_3_opus,83.2,mmlu-mixed,mixeval_240601,[],knowledge | |
| 266,gpt_4_turbo_2024_04_09,82.8,mmlu-mixed,mixeval_240601,[],knowledge | |
| 267,gemini_1.5_pro_api_0409,79.2,mmlu-mixed,mixeval_240601,[],knowledge | |
| 268,yi_large_preview,80.9,mmlu-mixed,mixeval_240601,[],knowledge | |
| 269,llama_3_70b_instruct,80.5,mmlu-mixed,mixeval_240601,[],knowledge | |
| 270,qwen_max_0428,80.6,mmlu-mixed,mixeval_240601,[],knowledge | |
| 271,claude_3_sonnet,74.7,mmlu-mixed,mixeval_240601,[],knowledge | |
| 272,reka_core_20240415,79.3,mmlu-mixed,mixeval_240601,[],knowledge | |
| 273,mammoth2_8x7b_plus,74.5,mmlu-mixed,mixeval_240601,[],knowledge | |
| 274,deepseek_v2,77.3,mmlu-mixed,mixeval_240601,[],knowledge | |
| 275,command_r_plus,78.9,mmlu-mixed,mixeval_240601,[],knowledge | |
| 276,yi_1.5_34b_chat,76.4,mmlu-mixed,mixeval_240601,[],knowledge | |
| 277,mistral_large,80.2,mmlu-mixed,mixeval_240601,[],knowledge | |
| 278,qwen1.5_72b_chat,80.1,mmlu-mixed,mixeval_240601,[],knowledge | |
| 279,mistral_medium,76.3,mmlu-mixed,mixeval_240601,[],knowledge | |
| 280,gemini_1.0_pro,74.9,mmlu-mixed,mixeval_240601,[],knowledge | |
| 281,reka_flash_20240226,75.4,mmlu-mixed,mixeval_240601,[],knowledge | |
| 282,mistral_small,75.2,mmlu-mixed,mixeval_240601,[],knowledge | |
| 283,llama_3_8b_instruct,71.9,mmlu-mixed,mixeval_240601,[],knowledge | |
| 284,command_r,75.0,mmlu-mixed,mixeval_240601,[],knowledge | |
| 285,qwen1.5_32b_chat,78.0,mmlu-mixed,mixeval_240601,[],knowledge | |
| 286,gpt_3.5_turbo_0125,74.5,mmlu-mixed,mixeval_240601,[],knowledge | |
| 287,claude_3_haiku,76.1,mmlu-mixed,mixeval_240601,[],knowledge | |
| 288,yi_34b_chat,73.6,mmlu-mixed,mixeval_240601,[],knowledge | |
| 289,mixtral_8x7b_instruct_v0.1,72.0,mmlu-mixed,mixeval_240601,[],knowledge | |
| 290,starling_lm_7b_beta,69.0,mmlu-mixed,mixeval_240601,[],knowledge | |
| 291,yi_1.5_9b_chat,72.6,mmlu-mixed,mixeval_240601,[],knowledge | |
| 292,gemma_1.1_7b_it,66.9,mmlu-mixed,mixeval_240601,[],knowledge | |
| 293,vicuna_33b_v1.3,59.2,mmlu-mixed,mixeval_240601,[],knowledge | |
| 294,llama_2_70b_chat,69.8,mmlu-mixed,mixeval_240601,[],knowledge | |
| 295,map_neo_instruct_v0.1,66.7,mmlu-mixed,mixeval_240601,[],knowledge | |
| 296,mistral_7b_instruct_v0.2,67.3,mmlu-mixed,mixeval_240601,[],knowledge | |
| 297,qwen1.5_7b_chat,68.7,mmlu-mixed,mixeval_240601,[],knowledge | |
| 298,reka_edge_20240208,63.6,mmlu-mixed,mixeval_240601,[],knowledge | |
| 299,zephyr_7b_beta,64.9,mmlu-mixed,mixeval_240601,[],knowledge | |
| 300,llama_2_7b_chat,59.4,mmlu-mixed,mixeval_240601,[],knowledge | |
| 301,yi_6b_chat,65.4,mmlu-mixed,mixeval_240601,[],knowledge | |
| 302,qwen1.5_moe_a2.7b_chat,69.5,mmlu-mixed,mixeval_240601,[],knowledge | |
| 303,gemma_1.1_2b_it,51.5,mmlu-mixed,mixeval_240601,[],knowledge | |
| 304,vicuna_7b_v1.5,58.7,mmlu-mixed,mixeval_240601,[],knowledge | |
| 305,olmo_7b_instruct,57.1,mmlu-mixed,mixeval_240601,[],knowledge | |
| 306,qwen1.5_4b_chat,61.4,mmlu-mixed,mixeval_240601,[],knowledge | |
| 307,jetmoe_8b_chat,58.5,mmlu-mixed,mixeval_240601,[],knowledge | |
| 308,mpt_7b_chat,37.8,mmlu-mixed,mixeval_240601,[],knowledge | |
| 309,llama_3_70b,79.8,mmlu-mixed,mixeval_240601,[],knowledge | |
| 310,qwen1.5_72b,78.8,mmlu-mixed,mixeval_240601,[],knowledge | |
| 311,yi_34b,79.3,mmlu-mixed,mixeval_240601,[],knowledge | |
| 312,qwen1.5_32b,77.2,mmlu-mixed,mixeval_240601,[],knowledge | |
| 313,mixtral_8x7b,71.6,mmlu-mixed,mixeval_240601,[],knowledge | |
| 314,llama_2_70b,70.8,mmlu-mixed,mixeval_240601,[],knowledge | |
| 315,qwen1.5_moe_a2.7b,69.4,mmlu-mixed,mixeval_240601,[],knowledge | |
| 316,qwen1.5_7b,67.0,mmlu-mixed,mixeval_240601,[],knowledge | |
| 317,llama_3_8b,69.5,mmlu-mixed,mixeval_240601,[],knowledge | |
| 318,mistral_7b,68.5,mmlu-mixed,mixeval_240601,[],knowledge | |
| 319,gemma_7b,67.4,mmlu-mixed,mixeval_240601,[],knowledge | |
| 320,yi_6b,71.2,mmlu-mixed,mixeval_240601,[],knowledge | |
| 321,qwen1.5_4b,59.6,mmlu-mixed,mixeval_240601,[],knowledge | |
| 322,jetmoe_8b,55.3,mmlu-mixed,mixeval_240601,[],knowledge | |
| 323,deepseek_7b,53.3,mmlu-mixed,mixeval_240601,[],knowledge | |
| 324,phi_2,62.5,mmlu-mixed,mixeval_240601,[],knowledge | |
| 325,deepseekmoe_16b,49.9,mmlu-mixed,mixeval_240601,[],knowledge | |
| 326,llama_2_7b,40.8,mmlu-mixed,mixeval_240601,[],knowledge | |
| 327,gemma_2b,37.4,mmlu-mixed,mixeval_240601,[],knowledge | |
| 328,olmo_7b,29.7,mmlu-mixed,mixeval_240601,[],knowledge | |
| 329,mpt_7b,30.9,mmlu-mixed,mixeval_240601,[],knowledge | |
| 330,gpt_4o_2024_05_13,87.9,drop-mixed,mixeval_240601,[],reasoning | |
| 331,claude_3_opus,91.5,drop-mixed,mixeval_240601,[],reasoning | |
| 332,gpt_4_turbo_2024_04_09,91.0,drop-mixed,mixeval_240601,[],reasoning | |
| 333,gemini_1.5_pro_api_0409,84.2,drop-mixed,mixeval_240601,[],reasoning | |
| 334,yi_large_preview,87.0,drop-mixed,mixeval_240601,[],reasoning | |
| 335,llama_3_70b_instruct,90.1,drop-mixed,mixeval_240601,[],reasoning | |
| 336,qwen_max_0428,85.4,drop-mixed,mixeval_240601,[],reasoning | |
| 337,claude_3_sonnet,87.7,drop-mixed,mixeval_240601,[],reasoning | |
| 338,reka_core_20240415,88.1,drop-mixed,mixeval_240601,[],reasoning | |
| 339,mammoth2_8x7b_plus,85.7,drop-mixed,mixeval_240601,[],reasoning | |
| 340,deepseek_v2,85.3,drop-mixed,mixeval_240601,[],reasoning | |
| 341,command_r_plus,80.4,drop-mixed,mixeval_240601,[],reasoning | |
| 342,yi_1.5_34b_chat,87.0,drop-mixed,mixeval_240601,[],reasoning | |
| 343,mistral_large,88.6,drop-mixed,mixeval_240601,[],reasoning | |
| 344,qwen1.5_72b_chat,85.1,drop-mixed,mixeval_240601,[],reasoning | |
| 345,mistral_medium,83.2,drop-mixed,mixeval_240601,[],reasoning | |
| 346,gemini_1.0_pro,82.6,drop-mixed,mixeval_240601,[],reasoning | |
| 347,reka_flash_20240226,86.7,drop-mixed,mixeval_240601,[],reasoning | |
| 348,mistral_small,86.1,drop-mixed,mixeval_240601,[],reasoning | |
| 349,llama_3_8b_instruct,86.4,drop-mixed,mixeval_240601,[],reasoning | |
| 350,command_r,72.0,drop-mixed,mixeval_240601,[],reasoning | |
| 351,qwen1.5_32b_chat,82.9,drop-mixed,mixeval_240601,[],reasoning | |
| 352,gpt_3.5_turbo_0125,84.8,drop-mixed,mixeval_240601,[],reasoning | |
| 353,claude_3_haiku,85.0,drop-mixed,mixeval_240601,[],reasoning | |
| 354,yi_34b_chat,86.1,drop-mixed,mixeval_240601,[],reasoning | |
| 355,mixtral_8x7b_instruct_v0.1,79.5,drop-mixed,mixeval_240601,[],reasoning | |
| 356,starling_lm_7b_beta,86.4,drop-mixed,mixeval_240601,[],reasoning | |
| 357,yi_1.5_9b_chat,83.9,drop-mixed,mixeval_240601,[],reasoning | |
| 358,gemma_1.1_7b_it,80.6,drop-mixed,mixeval_240601,[],reasoning | |
| 359,vicuna_33b_v1.3,71.4,drop-mixed,mixeval_240601,[],reasoning | |
| 360,llama_2_70b_chat,79.8,drop-mixed,mixeval_240601,[],reasoning | |
| 361,map_neo_instruct_v0.1,75.5,drop-mixed,mixeval_240601,[],reasoning | |
| 362,mistral_7b_instruct_v0.2,72.8,drop-mixed,mixeval_240601,[],reasoning | |
| 363,qwen1.5_7b_chat,76.4,drop-mixed,mixeval_240601,[],reasoning | |
| 364,reka_edge_20240208,80.0,drop-mixed,mixeval_240601,[],reasoning | |
| 365,zephyr_7b_beta,77.3,drop-mixed,mixeval_240601,[],reasoning | |
| 366,llama_2_7b_chat,69.3,drop-mixed,mixeval_240601,[],reasoning | |
| 367,yi_6b_chat,70.5,drop-mixed,mixeval_240601,[],reasoning | |
| 368,qwen1.5_moe_a2.7b_chat,64.6,drop-mixed,mixeval_240601,[],reasoning | |
| 369,gemma_1.1_2b_it,59.8,drop-mixed,mixeval_240601,[],reasoning | |
| 370,vicuna_7b_v1.5,68.3,drop-mixed,mixeval_240601,[],reasoning | |
| 371,olmo_7b_instruct,53.1,drop-mixed,mixeval_240601,[],reasoning | |
| 372,qwen1.5_4b_chat,57.2,drop-mixed,mixeval_240601,[],reasoning | |
| 373,jetmoe_8b_chat,27.0,drop-mixed,mixeval_240601,[],reasoning | |
| 374,mpt_7b_chat,50.0,drop-mixed,mixeval_240601,[],reasoning | |
| 375,llama_3_70b,81.5,drop-mixed,mixeval_240601,[],reasoning | |
| 376,qwen1.5_72b,64.5,drop-mixed,mixeval_240601,[],reasoning | |
| 377,yi_34b,78.2,drop-mixed,mixeval_240601,[],reasoning | |
| 378,qwen1.5_32b,68.7,drop-mixed,mixeval_240601,[],reasoning | |
| 379,mixtral_8x7b,69.8,drop-mixed,mixeval_240601,[],reasoning | |
| 380,llama_2_70b,73.2,drop-mixed,mixeval_240601,[],reasoning | |
| 381,qwen1.5_moe_a2.7b,59.9,drop-mixed,mixeval_240601,[],reasoning | |
| 382,qwen1.5_7b,63.6,drop-mixed,mixeval_240601,[],reasoning | |
| 383,llama_3_8b,63.8,drop-mixed,mixeval_240601,[],reasoning | |
| 384,mistral_7b,61.3,drop-mixed,mixeval_240601,[],reasoning | |
| 385,gemma_7b,63.8,drop-mixed,mixeval_240601,[],reasoning | |
| 386,yi_6b,51.4,drop-mixed,mixeval_240601,[],reasoning | |
| 387,qwen1.5_4b,51.0,drop-mixed,mixeval_240601,[],reasoning | |
| 388,jetmoe_8b,44.1,drop-mixed,mixeval_240601,[],reasoning | |
| 389,deepseek_7b,43.5,drop-mixed,mixeval_240601,[],reasoning | |
| 390,phi_2,50.4,drop-mixed,mixeval_240601,[],reasoning | |
| 391,deepseekmoe_16b,41.1,drop-mixed,mixeval_240601,[],reasoning | |
| 392,llama_2_7b,37.6,drop-mixed,mixeval_240601,[],reasoning | |
| 393,gemma_2b,32.6,drop-mixed,mixeval_240601,[],reasoning | |
| 394,olmo_7b,24.0,drop-mixed,mixeval_240601,[],reasoning | |
| 395,mpt_7b,26.8,drop-mixed,mixeval_240601,[],reasoning | |
| 396,gpt_4o_2024_05_13,94.3,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 397,claude_3_opus,93.3,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 398,gpt_4_turbo_2024_04_09,92.6,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 399,gemini_1.5_pro_api_0409,89.2,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 400,yi_large_preview,92.6,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 401,llama_3_70b_instruct,81.8,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 402,qwen_max_0428,93.6,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 403,claude_3_sonnet,85.9,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 404,reka_core_20240415,88.6,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 405,mammoth2_8x7b_plus,82.2,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 406,deepseek_v2,88.2,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 407,command_r_plus,83.5,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 408,yi_1.5_34b_chat,90.2,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 409,mistral_large,65.0,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 410,qwen1.5_72b_chat,87.9,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 411,mistral_medium,72.4,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 412,gemini_1.0_pro,74.7,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 413,reka_flash_20240226,90.6,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 414,mistral_small,73.4,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 415,llama_3_8b_instruct,65.7,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 416,command_r,75.8,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 417,qwen1.5_32b_chat,85.9,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 418,gpt_3.5_turbo_0125,63.0,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 419,claude_3_haiku,75.8,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 420,yi_34b_chat,86.9,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 421,mixtral_8x7b_instruct_v0.1,54.2,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 422,starling_lm_7b_beta,48.5,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 423,yi_1.5_9b_chat,86.5,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 424,gemma_1.1_7b_it,66.3,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 425,vicuna_33b_v1.3,30.3,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 426,llama_2_70b_chat,67.3,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 427,map_neo_instruct_v0.1,74.4,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 428,mistral_7b_instruct_v0.2,54.2,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 429,qwen1.5_7b_chat,76.1,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 430,reka_edge_20240208,74.7,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 431,zephyr_7b_beta,39.1,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 432,llama_2_7b_chat,35.7,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 433,yi_6b_chat,52.5,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 434,qwen1.5_moe_a2.7b_chat,72.7,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 435,gemma_1.1_2b_it,26.6,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 436,vicuna_7b_v1.5,24.9,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 437,olmo_7b_instruct,55.9,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 438,qwen1.5_4b_chat,54.9,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 439,jetmoe_8b_chat,86.2,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 440,mpt_7b_chat,25.6,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 441,llama_3_70b,90.9,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 442,qwen1.5_72b,91.9,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 443,yi_34b,98.0,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 444,qwen1.5_32b,93.3,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 445,mixtral_8x7b,73.7,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 446,llama_2_70b,63.0,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 447,qwen1.5_moe_a2.7b,80.1,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 448,qwen1.5_7b,83.8,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 449,llama_3_8b,51.5,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 450,mistral_7b,54.5,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 451,gemma_7b,36.0,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 452,yi_6b,77.4,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 453,qwen1.5_4b,65.7,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 454,jetmoe_8b,89.2,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 455,deepseek_7b,35.0,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 456,phi_2,20.2,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 457,deepseekmoe_16b,28.6,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 458,llama_2_7b,24.9,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 459,gemma_2b,33.3,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 460,olmo_7b,26.9,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 461,mpt_7b,19.2,hellaswag-mixed,mixeval_240601,[],reasoning | |
| 462,gpt_4o_2024_05_13,86.8,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 463,claude_3_opus,87.7,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 464,gpt_4_turbo_2024_04_09,85.4,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 465,gemini_1.5_pro_api_0409,84.4,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 466,yi_large_preview,90.1,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 467,llama_3_70b_instruct,83.0,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 468,qwen_max_0428,88.2,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 469,claude_3_sonnet,82.5,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 470,reka_core_20240415,81.6,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 471,mammoth2_8x7b_plus,82.5,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 472,deepseek_v2,84.0,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 473,command_r_plus,82.1,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 474,yi_1.5_34b_chat,86.8,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 475,mistral_large,83.5,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 476,qwen1.5_72b_chat,86.3,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 477,mistral_medium,82.5,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 478,gemini_1.0_pro,80.2,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 479,reka_flash_20240226,80.7,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 480,mistral_small,77.8,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 481,llama_3_8b_instruct,78.3,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 482,command_r,77.4,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 483,qwen1.5_32b_chat,88.2,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 484,gpt_3.5_turbo_0125,81.6,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 485,claude_3_haiku,78.8,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 486,yi_34b_chat,78.8,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 487,mixtral_8x7b_instruct_v0.1,77.4,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 488,starling_lm_7b_beta,84.9,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 489,yi_1.5_9b_chat,82.5,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 490,gemma_1.1_7b_it,73.6,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 491,vicuna_33b_v1.3,61.8,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 492,llama_2_70b_chat,74.1,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 493,map_neo_instruct_v0.1,82.1,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 494,mistral_7b_instruct_v0.2,66.0,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 495,qwen1.5_7b_chat,82.1,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 496,reka_edge_20240208,80.7,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 497,zephyr_7b_beta,69.3,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 498,llama_2_7b_chat,61.3,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 499,yi_6b_chat,69.8,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 500,qwen1.5_moe_a2.7b_chat,81.1,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 501,gemma_1.1_2b_it,57.1,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 502,vicuna_7b_v1.5,62.7,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 503,olmo_7b_instruct,64.6,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 504,qwen1.5_4b_chat,74.1,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 505,jetmoe_8b_chat,68.4,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 506,mpt_7b_chat,36.3,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 507,llama_3_70b,85.4,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 508,qwen1.5_72b,87.3,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 509,yi_34b,81.1,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 510,qwen1.5_32b,89.2,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 511,mixtral_8x7b,77.4,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 512,llama_2_70b,77.4,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 513,qwen1.5_moe_a2.7b,80.2,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 514,qwen1.5_7b,84.4,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 515,llama_3_8b,69.8,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 516,mistral_7b,67.9,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 517,gemma_7b,68.4,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 518,yi_6b,76.4,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 519,qwen1.5_4b,79.2,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 520,jetmoe_8b,60.4,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 521,deepseek_7b,51.4,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 522,phi_2,68.9,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 523,deepseekmoe_16b,48.6,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 524,llama_2_7b,30.7,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 525,gemma_2b,31.6,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 526,olmo_7b,25.5,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 527,mpt_7b,28.8,commonsenseqa-mixed,mixeval_240601,[],reasoning | |
| 528,gpt_4o_2024_05_13,70.3,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 529,claude_3_opus,71.4,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 530,gpt_4_turbo_2024_04_09,73.1,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 531,gemini_1.5_pro_api_0409,67.8,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 532,yi_large_preview,55.4,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 533,llama_3_70b_instruct,60.5,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 534,qwen_max_0428,61.5,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 535,claude_3_sonnet,59.1,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 536,reka_core_20240415,51.6,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 537,mammoth2_8x7b_plus,52.9,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 538,deepseek_v2,51.7,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 539,command_r_plus,57.5,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 540,yi_1.5_34b_chat,44.4,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 541,mistral_large,55.5,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 542,qwen1.5_72b_chat,49.9,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 543,mistral_medium,59.8,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 544,gemini_1.0_pro,58.2,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 545,reka_flash_20240226,42.9,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 546,mistral_small,56.0,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 547,llama_3_8b_instruct,40.2,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 548,command_r,57.0,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 549,qwen1.5_32b_chat,39.1,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 550,gpt_3.5_turbo_0125,46.4,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 551,claude_3_haiku,42.4,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 552,yi_34b_chat,41.5,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 553,mixtral_8x7b_instruct_v0.1,48.5,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 554,starling_lm_7b_beta,33.4,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 555,yi_1.5_9b_chat,23.3,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 556,gemma_1.1_7b_it,30.3,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 557,vicuna_33b_v1.3,42.5,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 558,llama_2_70b_chat,42.2,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 559,map_neo_instruct_v0.1,26.5,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 560,mistral_7b_instruct_v0.2,33.5,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 561,qwen1.5_7b_chat,29.0,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 562,reka_edge_20240208,18.6,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 563,zephyr_7b_beta,30.2,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 564,llama_2_7b_chat,24.8,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 565,yi_6b_chat,18.9,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 566,qwen1.5_moe_a2.7b_chat,21.9,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 567,gemma_1.1_2b_it,31.9,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 568,vicuna_7b_v1.5,25.9,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 569,olmo_7b_instruct,24.7,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 570,qwen1.5_4b_chat,16.5,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 571,jetmoe_8b_chat,19.2,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 572,mpt_7b_chat,17.5,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 573,llama_3_70b,59.1,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 574,qwen1.5_72b,41.4,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 575,yi_34b,39.4,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 576,qwen1.5_32b,28.0,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 577,mixtral_8x7b,44.1,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 578,llama_2_70b,53.8,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 579,qwen1.5_moe_a2.7b,36.0,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 580,qwen1.5_7b,31.6,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 581,llama_3_8b,22.6,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 582,mistral_7b,24.2,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 583,gemma_7b,31.1,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 584,yi_6b,17.0,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 585,qwen1.5_4b,14.0,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 586,jetmoe_8b,22.8,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 587,deepseek_7b,21.4,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 588,phi_2,7.3,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 589,deepseekmoe_16b,24.9,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 590,llama_2_7b,19.5,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 591,gemma_2b,12.1,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 592,olmo_7b,16.0,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 593,mpt_7b,6.6,triviaqa_hard-mixed,mixeval_240601,[],knowledge | |
| 594,gpt_4o_2024_05_13,57.1,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 595,claude_3_opus,55.0,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 596,gpt_4_turbo_2024_04_09,45.5,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 597,gemini_1.5_pro_api_0409,44.6,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 598,yi_large_preview,48.5,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 599,llama_3_70b_instruct,46.3,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 600,qwen_max_0428,41.6,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 601,claude_3_sonnet,40.7,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 602,reka_core_20240415,46.3,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 603,mammoth2_8x7b_plus,41.1,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 604,deepseek_v2,42.0,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 605,command_r_plus,42.0,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 606,yi_1.5_34b_chat,38.1,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 607,mistral_large,42.4,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 608,qwen1.5_72b_chat,37.7,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 609,mistral_medium,38.5,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 610,gemini_1.0_pro,35.5,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 611,reka_flash_20240226,34.6,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 612,mistral_small,33.8,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 613,llama_3_8b_instruct,40.7,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 614,command_r,39.0,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 615,qwen1.5_32b_chat,29.9,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 616,gpt_3.5_turbo_0125,35.1,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 617,claude_3_haiku,30.7,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 618,yi_34b_chat,29.9,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 619,mixtral_8x7b_instruct_v0.1,37.2,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 620,starling_lm_7b_beta,34.2,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 621,yi_1.5_9b_chat,36.8,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 622,gemma_1.1_7b_it,39.0,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 623,vicuna_33b_v1.3,39.4,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 624,llama_2_70b_chat,27.7,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 625,map_neo_instruct_v0.1,32.5,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 626,mistral_7b_instruct_v0.2,29.4,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 627,qwen1.5_7b_chat,29.0,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 628,reka_edge_20240208,26.4,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 629,zephyr_7b_beta,24.2,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 630,llama_2_7b_chat,30.3,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 631,yi_6b_chat,26.8,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 632,qwen1.5_moe_a2.7b_chat,26.8,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 633,gemma_1.1_2b_it,30.3,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 634,vicuna_7b_v1.5,23.4,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 635,olmo_7b_instruct,27.3,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 636,qwen1.5_4b_chat,17.3,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 637,jetmoe_8b_chat,25.5,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 638,mpt_7b_chat,24.7,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 639,llama_3_70b,39.8,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 640,qwen1.5_72b,42.4,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 641,yi_34b,42.4,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 642,qwen1.5_32b,37.2,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 643,mixtral_8x7b,34.6,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 644,llama_2_70b,29.0,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 645,qwen1.5_moe_a2.7b,30.7,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 646,qwen1.5_7b,28.6,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 647,llama_3_8b,38.5,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 648,mistral_7b,27.7,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 649,gemma_7b,28.1,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 650,yi_6b,37.2,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 651,qwen1.5_4b,22.9,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 652,jetmoe_8b,27.3,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 653,deepseek_7b,26.4,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 654,phi_2,29.0,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 655,deepseekmoe_16b,30.7,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 656,llama_2_7b,24.7,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 657,gemma_2b,27.3,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 658,olmo_7b,25.1,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 659,mpt_7b,24.2,mmlu_hard-mixed,mixeval_240601,[],knowledge | |
| 660,gpt_4o_2024_05_13,67.5,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 661,claude_3_opus,75.2,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 662,gpt_4_turbo_2024_04_09,71.0,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 663,gemini_1.5_pro_api_0409,64.8,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 664,yi_large_preview,63.1,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 665,llama_3_70b_instruct,74.5,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 666,qwen_max_0428,53.5,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 667,claude_3_sonnet,66.9,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 668,reka_core_20240415,66.6,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 669,mammoth2_8x7b_plus,65.1,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 670,deepseek_v2,62.8,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 671,command_r_plus,65.0,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 672,yi_1.5_34b_chat,67.4,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 673,mistral_large,61.6,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 674,qwen1.5_72b_chat,56.5,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 675,mistral_medium,47.1,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 676,gemini_1.0_pro,54.1,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 677,reka_flash_20240226,65.0,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 678,mistral_small,52.6,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 679,llama_3_8b_instruct,67.6,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 680,command_r,42.0,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 681,qwen1.5_32b_chat,54.4,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 682,gpt_3.5_turbo_0125,55.4,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 683,claude_3_haiku,51.5,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 684,yi_34b_chat,57.1,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 685,mixtral_8x7b_instruct_v0.1,47.7,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 686,starling_lm_7b_beta,62.9,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 687,yi_1.5_9b_chat,61.3,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 688,gemma_1.1_7b_it,55.1,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 689,vicuna_33b_v1.3,36.6,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 690,llama_2_70b_chat,42.2,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 691,map_neo_instruct_v0.1,42.4,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 692,mistral_7b_instruct_v0.2,44.3,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 693,qwen1.5_7b_chat,50.0,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 694,reka_edge_20240208,56.9,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 695,zephyr_7b_beta,45.3,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 696,llama_2_7b_chat,44.3,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 697,yi_6b_chat,43.7,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 698,qwen1.5_moe_a2.7b_chat,39.5,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 699,gemma_1.1_2b_it,27.8,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 700,vicuna_7b_v1.5,33.2,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 701,olmo_7b_instruct,22.9,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 702,qwen1.5_4b_chat,28.6,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 703,jetmoe_8b_chat,11.5,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 704,mpt_7b_chat,31.0,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 705,llama_3_70b,59.5,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 706,qwen1.5_72b,26.2,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 707,yi_34b,56.5,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 708,qwen1.5_32b,36.9,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 709,mixtral_8x7b,42.0,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 710,llama_2_70b,46.1,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 711,qwen1.5_moe_a2.7b,31.0,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 712,qwen1.5_7b,29.8,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 713,llama_3_8b,37.1,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 714,mistral_7b,34.5,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 715,gemma_7b,31.4,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 716,yi_6b,19.4,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 717,qwen1.5_4b,24.7,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 718,jetmoe_8b,19.2,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 719,deepseek_7b,21.4,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 720,phi_2,27.1,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 721,deepseekmoe_16b,12.2,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 722,llama_2_7b,14.9,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 723,gemma_2b,13.2,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 724,olmo_7b,11.1,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 725,mpt_7b,9.2,drop_hard-mixed,mixeval_240601,[],reasoning | |
| 771,llama_3_70b,81.7,boolq-mixed,mixeval_240601,[],knowledge | |
| 772,qwen1.5_72b,86.9,boolq-mixed,mixeval_240601,[],knowledge | |
| 773,yi_34b,79.4,boolq-mixed,mixeval_240601,[],knowledge | |
| 774,qwen1.5_32b,83.4,boolq-mixed,mixeval_240601,[],knowledge | |
| 775,mixtral_8x7b,77.7,boolq-mixed,mixeval_240601,[],knowledge | |
| 776,llama_2_70b,74.3,boolq-mixed,mixeval_240601,[],knowledge | |
| 777,qwen1.5_moe_a2.7b,70.9,boolq-mixed,mixeval_240601,[],knowledge | |
| 778,qwen1.5_7b,77.7,boolq-mixed,mixeval_240601,[],knowledge | |
| 779,llama_3_8b,64.0,boolq-mixed,mixeval_240601,[],knowledge | |
| 780,mistral_7b,68.0,boolq-mixed,mixeval_240601,[],knowledge | |
| 781,gemma_7b,74.3,boolq-mixed,mixeval_240601,[],knowledge | |
| 782,yi_6b,65.1,boolq-mixed,mixeval_240601,[],knowledge | |
| 783,qwen1.5_4b,72.0,boolq-mixed,mixeval_240601,[],knowledge | |
| 784,jetmoe_8b,64.6,boolq-mixed,mixeval_240601,[],knowledge | |
| 785,deepseek_7b,62.9,boolq-mixed,mixeval_240601,[],knowledge | |
| 786,phi_2,73.1,boolq-mixed,mixeval_240601,[],knowledge | |
| 787,deepseekmoe_16b,62.9,boolq-mixed,mixeval_240601,[],knowledge | |
| 788,llama_2_7b,61.7,boolq-mixed,mixeval_240601,[],knowledge | |
| 789,gemma_2b,58.9,boolq-mixed,mixeval_240601,[],knowledge | |
| 790,olmo_7b,49.1,boolq-mixed,mixeval_240601,[],knowledge | |
| 791,mpt_7b,44.0,boolq-mixed,mixeval_240601,[],knowledge | |
| 593,gpt_4_0314,0.57,agieval,BLZ_240312,[],holistic | |
| 594,gpt_4_0613,0.57,agieval,BLZ_240312,[],holistic | |
| 596,claude_1,0.49700000000000005,agieval,BLZ_240312,[],holistic | |
| 601,mixtral_8x7b_instruct_v0.1,0.45299999999999996,agieval,BLZ_240312,[],holistic | |
| 602,yi_34b_chat,0.508,agieval,BLZ_240312,[],holistic | |
| 605,gpt_3.5_turbo_0314,0.43200000000000005,agieval,BLZ_240312,[],holistic | |
| 608,vicuna_33b,0.373,agieval,BLZ_240312,[],holistic | |
| 609,starling_lm_7b_alpha,0.401,agieval,BLZ_240312,[],holistic | |
| 611,llama_2_70b_chat,0.45,agieval,BLZ_240312,[],holistic | |
| 613,openhermes_2.5_mistral_7b,0.43,agieval,BLZ_240312,[],holistic | |
| 614,openchat_3.5,0.42700000000000005,agieval,BLZ_240312,[],holistic | |
| 617,solar_10.7b_instruct_v1.0,0.47600000000000003,agieval,BLZ_240312,[],holistic | |
| 618,dolphin_2.2.1_mistral_7b,0.392,agieval,BLZ_240312,[],holistic | |
| 620,zephyr_7b_beta,0.406,agieval,BLZ_240312,[],holistic | |
| 623,llama_2_13b_chat,0.336,agieval,BLZ_240312,[],holistic | |
| 624,vicuna_13b,0.368,agieval,BLZ_240312,[],holistic | |
| 626,zephyr_7b_alpha,0.38,agieval,BLZ_240312,[],holistic | |
| 627,qwen_14b_chat,0.396,agieval,BLZ_240312,[],holistic | |
| 630,llama_2_7b_chat,0.29600000000000004,agieval,BLZ_240312,[],holistic | |
| 632,mistral_7b_instruct_v0.1,0.335,agieval,BLZ_240312,[],holistic | |
| 634,vicuna_7b,0.314,agieval,BLZ_240312,[],holistic | |
| 636,chatglm3_6b,0.414,agieval,BLZ_240312,[],holistic | |
| 643,chatglm_6b,0.325,agieval,BLZ_240312,[],holistic | |
| 647,llama_13b,0.205,agieval,BLZ_240312,[],holistic | |
| 180,gpt_4_0314,0.963,arc_c,BLZ_240312,[],reasoning | |
| 182,mistral_medium,0.899,arc_c,BLZ_240312,[],reasoning | |
| 188,mixtral_8x7b_instruct_v0.1,0.7021999999999999,arc_c,BLZ_240312,[],reasoning | |
| 189,yi_34b_chat,0.6544,arc_c,BLZ_240312,[],reasoning | |
| 192,gpt_3.5_turbo_0314,0.855,arc_c,BLZ_240312,[],reasoning | |
| 193,wizardlm_70b_v1.0,0.6544,arc_c,BLZ_240312,[],reasoning | |
| 194,tulu_2_dpo_70b,0.721,arc_c,BLZ_240312,[],reasoning | |
| 195,vicuna_33b,0.6212,arc_c,BLZ_240312,[],reasoning | |
| 196,starling_lm_7b_alpha,0.6382,arc_c,BLZ_240312,[],reasoning | |
| 198,llama_2_70b_chat,0.6459,arc_c,BLZ_240312,[],reasoning | |
| 200,openhermes_2.5_mistral_7b,0.6493000000000001,arc_c,BLZ_240312,[],reasoning | |
| 201,openchat_3.5,0.6391,arc_c,BLZ_240312,[],reasoning | |
| 204,solar_10.7b_instruct_v1.0,0.7108,arc_c,BLZ_240312,[],reasoning | |
| 205,dolphin_2.2.1_mistral_7b,0.6331,arc_c,BLZ_240312,[],reasoning | |
| 206,wizardlm_13b_v1.2,0.5904,arc_c,BLZ_240312,[],reasoning | |
| 207,zephyr_7b_beta,0.6203,arc_c,BLZ_240312,[],reasoning | |
| 208,mpt_30b_chat,0.5870000000000001,arc_c,BLZ_240312,[],reasoning | |
| 209,codellama_34b_instruct,0.5427000000000001,arc_c,BLZ_240312,[],reasoning | |
| 210,llama_2_13b_chat,0.5904,arc_c,BLZ_240312,[],reasoning | |
| 211,vicuna_13b,0.5708,arc_c,BLZ_240312,[],reasoning | |
| 213,zephyr_7b_alpha,0.6101,arc_c,BLZ_240312,[],reasoning | |
| 215,falcon_180b_chat,0.6945,arc_c,BLZ_240312,[],reasoning | |
| 217,llama_2_7b_chat,0.529,arc_c,BLZ_240312,[],reasoning | |
| 219,mistral_7b_instruct_v0.1,0.5452,arc_c,BLZ_240312,[],reasoning | |
| 221,vicuna_7b,0.5324,arc_c,BLZ_240312,[],reasoning | |
| 235,yi_34bx2_moe_60b,0.7108,arc_c,BLZ_240312,[],reasoning | |
| 886,gpt_4_1106_preview,0.977,alpacav1,BLZ_240312,[],holistic | |
| 888,gpt_4_0314,0.9528,alpacav1,BLZ_240312,[],holistic | |
| 889,gpt_4_0613,0.9528,alpacav1,BLZ_240312,[],holistic | |
| 890,mistral_medium,0.9682999999999999,alpacav1,BLZ_240312,[],holistic | |
| 891,claude_1,0.8839,alpacav1,BLZ_240312,[],holistic | |
| 892,claude_2.0,0.9136,alpacav1,BLZ_240312,[],holistic | |
| 893,gemini_pro_dev_api,0.7966,alpacav1,BLZ_240312,[],holistic | |
| 894,claude_2.1,0.8708,alpacav1,BLZ_240312,[],holistic | |
| 895,gpt_3.5_turbo_0613,0.8937,alpacav1,BLZ_240312,[],holistic | |
| 896,mixtral_8x7b_instruct_v0.1,0.9478,alpacav1,BLZ_240312,[],holistic | |
| 897,yi_34b_chat,0.9408,alpacav1,BLZ_240312,[],holistic | |
| 898,gemini_pro,0.7966,alpacav1,BLZ_240312,[],holistic | |
| 900,gpt_3.5_turbo_0314,0.8937,alpacav1,BLZ_240312,[],holistic | |
| 902,tulu_2_dpo_70b,0.9503,alpacav1,BLZ_240312,[],holistic | |
| 903,vicuna_33b,0.8898999999999999,alpacav1,BLZ_240312,[],holistic | |
| 904,starling_lm_7b_alpha,0.9198999999999999,alpacav1,BLZ_240312,[],holistic | |
| 906,llama_2_70b_chat,0.9266,alpacav1,BLZ_240312,[],holistic | |
| 909,openchat_3.5,0.8851,alpacav1,BLZ_240312,[],holistic | |
| 911,gpt_3.5_turbo_1106,0.8626,alpacav1,BLZ_240312,[],holistic | |
| 914,wizardlm_13b_v1.2,0.8917,alpacav1,BLZ_240312,[],holistic | |
| 915,zephyr_7b_beta,0.9059999999999999,alpacav1,BLZ_240312,[],holistic | |
| 918,llama_2_13b_chat,0.8109000000000001,alpacav1,BLZ_240312,[],holistic | |
| 921,zephyr_7b_alpha,0.8576,alpacav1,BLZ_240312,[],holistic | |
| 924,guanaco_33b,0.6596,alpacav1,BLZ_240312,[],holistic | |
| 925,llama_2_7b_chat,0.7137,alpacav1,BLZ_240312,[],holistic | |
| 934,chatglm2_6b,0.47130000000000005,alpacav1,BLZ_240312,[],holistic | |
| 937,openassistant_pythia_12b,0.2596,alpacav1,BLZ_240312,[],holistic | |
| 827,gpt_4_1106_preview,0.5,alpacav2,BLZ_240312,[],holistic | |
| 829,gpt_4_0314,0.221,alpacav2,BLZ_240312,[],holistic | |
| 830,gpt_4_0613,0.158,alpacav2,BLZ_240312,[],holistic | |
| 831,mistral_medium,0.21899999999999997,alpacav2,BLZ_240312,[],holistic | |
| 832,claude_1,0.17,alpacav2,BLZ_240312,[],holistic | |
| 833,claude_2.0,0.172,alpacav2,BLZ_240312,[],holistic | |
| 834,gemini_pro_dev_api,0.16899999999999998,alpacav2,BLZ_240312,[],holistic | |
| 835,claude_2.1,0.157,alpacav2,BLZ_240312,[],holistic | |
| 836,gpt_3.5_turbo_0613,0.141,alpacav2,BLZ_240312,[],holistic | |
| 837,mixtral_8x7b_instruct_v0.1,0.183,alpacav2,BLZ_240312,[],holistic | |
| 838,yi_34b_chat,0.297,alpacav2,BLZ_240312,[],holistic | |
| 839,gemini_pro,0.16899999999999998,alpacav2,BLZ_240312,[],holistic | |
| 840,claude_instant_1,0.161,alpacav2,BLZ_240312,[],holistic | |
| 841,gpt_3.5_turbo_0314,0.096,alpacav2,BLZ_240312,[],holistic | |
| 842,wizardlm_70b_v1.0,0.14400000000000002,alpacav2,BLZ_240312,[],holistic | |
| 843,tulu_2_dpo_70b,0.16,alpacav2,BLZ_240312,[],holistic | |
| 844,vicuna_33b,0.127,alpacav2,BLZ_240312,[],holistic | |
| 845,starling_lm_7b_alpha,0.142,alpacav2,BLZ_240312,[],holistic | |
| 846,deepseek_llm_67b_chat,0.121,alpacav2,BLZ_240312,[],holistic | |
| 847,llama_2_70b_chat,0.139,alpacav2,BLZ_240312,[],holistic | |
| 849,openhermes_2.5_mistral_7b,0.10300000000000001,alpacav2,BLZ_240312,[],holistic | |
| 852,gpt_3.5_turbo_1106,0.092,alpacav2,BLZ_240312,[],holistic | |
| 854,dolphin_2.2.1_mistral_7b,0.09,alpacav2,BLZ_240312,[],holistic | |
| 855,wizardlm_13b_v1.2,0.12,alpacav2,BLZ_240312,[],holistic | |
| 856,zephyr_7b_beta,0.11,alpacav2,BLZ_240312,[],holistic | |
| 859,llama_2_13b_chat,0.077,alpacav2,BLZ_240312,[],holistic | |
| 860,vicuna_13b,0.067,alpacav2,BLZ_240312,[],holistic | |
| 862,zephyr_7b_alpha,0.084,alpacav2,BLZ_240312,[],holistic | |
| 863,qwen_14b_chat,0.075,alpacav2,BLZ_240312,[],holistic | |
| 865,guanaco_33b,0.05,alpacav2,BLZ_240312,[],holistic | |
| 866,llama_2_7b_chat,0.0496,alpacav2,BLZ_240312,[],holistic | |
| 870,vicuna_7b,0.048,alpacav2,BLZ_240312,[],holistic | |
| 875,chatglm2_6b,0.027999999999999997,alpacav2,BLZ_240312,[],holistic | |
| 878,openassistant_pythia_12b,0.018000000000000002,alpacav2,BLZ_240312,[],holistic | |
| 1299,gpt_4_1106_preview,0.32799999999999996,alpacaeval2_lc,BLZ_240312,[],holistic | |
| 1301,gpt_4_0314,0.21600000000000003,alpacaeval2_lc,BLZ_240312,[],holistic | |
| 1302,gpt_4_0613,0.18600000000000003,alpacaeval2_lc,BLZ_240312,[],holistic | |
| 1303,mistral_medium,0.196,alpacaeval2_lc,BLZ_240312,[],holistic | |
| 1304,claude_1,0.21100000000000002,alpacaeval2_lc,BLZ_240312,[],holistic | |
| 1305,claude_2.0,0.21600000000000003,alpacaeval2_lc,BLZ_240312,[],holistic | |
| 1306,gemini_pro_dev_api,0.172,alpacaeval2_lc,BLZ_240312,[],holistic | |
| 1307,claude_2.1,0.193,alpacaeval2_lc,BLZ_240312,[],holistic | |
| 1308,gpt_3.5_turbo_0613,0.14300000000000002,alpacaeval2_lc,BLZ_240312,[],holistic | |
| 1309,mixtral_8x7b_instruct_v0.1,0.168,alpacaeval2_lc,BLZ_240312,[],holistic | |
| 1310,yi_34b_chat,0.188,alpacaeval2_lc,BLZ_240312,[],holistic | |
| 1312,claude_instant_1,0.195,alpacaeval2_lc,BLZ_240312,[],holistic | |
| 1313,gpt_3.5_turbo_0314,0.156,alpacaeval2_lc,BLZ_240312,[],holistic | |
| 1314,wizardlm_70b_v1.0,0.125,alpacaeval2_lc,BLZ_240312,[],holistic | |
| 1315,tulu_2_dpo_70b,0.151,alpacaeval2_lc,BLZ_240312,[],holistic | |
| 1316,vicuna_33b,0.115,alpacaeval2_lc,BLZ_240312,[],holistic | |
| 1317,starling_lm_7b_alpha,0.10099999999999999,alpacaeval2_lc,BLZ_240312,[],holistic | |
| 1318,deepseek_llm_67b_chat,0.141,alpacaeval2_lc,BLZ_240312,[],holistic | |
| 1319,llama_2_70b_chat,0.10400000000000001,alpacaeval2_lc,BLZ_240312,[],holistic | |
| 1321,openhermes_2.5_mistral_7b,0.126,alpacaeval2_lc,BLZ_240312,[],holistic | |
| 1324,gpt_3.5_turbo_1106,0.155,alpacaeval2_lc,BLZ_240312,[],holistic | |
| 1326,dolphin_2.2.1_mistral_7b,0.10800000000000001,alpacaeval2_lc,BLZ_240312,[],holistic | |
| 1327,wizardlm_13b_v1.2,0.099,alpacaeval2_lc,BLZ_240312,[],holistic | |
| 1328,zephyr_7b_beta,0.102,alpacaeval2_lc,BLZ_240312,[],holistic | |
| 1331,llama_2_13b_chat,0.068,alpacaeval2_lc,BLZ_240312,[],holistic | |
| 1332,vicuna_13b,0.085,alpacaeval2_lc,BLZ_240312,[],holistic | |
| 1334,zephyr_7b_alpha,0.086,alpacaeval2_lc,BLZ_240312,[],holistic | |
| 1335,qwen_14b_chat,0.1,alpacaeval2_lc,BLZ_240312,[],holistic | |
| 1338,llama_2_7b_chat,0.045,alpacaeval2_lc,BLZ_240312,[],holistic | |
| 1342,vicuna_7b,0.06,alpacaeval2_lc,BLZ_240312,[],holistic | |
| 0,gpt_4_0125_preview,1.0,arena_elo,BLZ_240312,[],holistic | |
| 1,gpt_4_1106_preview,0.9992019154030327,arena_elo,BLZ_240312,[],holistic | |
| 2,bard_gemini_pro,0.9768555466879489,arena_elo,BLZ_240312,[],holistic | |
| 3,gpt_4_0314,0.9497206703910615,arena_elo,BLZ_240312,[],holistic | |
| 4,gpt_4_0613,0.9273743016759777,arena_elo,BLZ_240312,[],holistic | |
| 5,mistral_medium,0.9177972865123704,arena_elo,BLZ_240312,[],holistic | |
| 6,claude_1,0.9169992019154031,arena_elo,BLZ_240312,[],holistic | |
| 7,claude_2.0,0.9034317637669593,arena_elo,BLZ_240312,[],holistic | |
| 8,gemini_pro_dev_api,0.8938547486033519,arena_elo,BLZ_240312,[],holistic | |
| 9,claude_2.1,0.8930566640063847,arena_elo,BLZ_240312,[],holistic | |
| 10,gpt_3.5_turbo_0613,0.8922585794094174,arena_elo,BLZ_240312,[],holistic | |
| 11,mixtral_8x7b_instruct_v0.1,0.8922585794094174,arena_elo,BLZ_240312,[],holistic | |
| 12,yi_34b_chat,0.8898643256185156,arena_elo,BLZ_240312,[],holistic | |
| 13,gemini_pro,0.8890662410215483,arena_elo,BLZ_240312,[],holistic | |
| 14,claude_instant_1,0.8850758180367119,arena_elo,BLZ_240312,[],holistic | |
| 15,gpt_3.5_turbo_0314,0.8818834796488427,arena_elo,BLZ_240312,[],holistic | |
| 16,wizardlm_70b_v1.0,0.8818834796488427,arena_elo,BLZ_240312,[],holistic | |
| 17,tulu_2_dpo_70b,0.8810853950518756,arena_elo,BLZ_240312,[],holistic | |
| 18,vicuna_33b,0.8723064644852354,arena_elo,BLZ_240312,[],holistic | |
| 19,starling_lm_7b_alpha,0.8699122106943336,arena_elo,BLZ_240312,[],holistic | |
| 20,deepseek_llm_67b_chat,0.8635275339185954,arena_elo,BLZ_240312,[],holistic | |
| 21,llama_2_70b_chat,0.8635275339185954,arena_elo,BLZ_240312,[],holistic | |
| 22,nv_llama2_70b_steerlm_chat,0.8603351955307262,arena_elo,BLZ_240312,[],holistic | |
| 23,openhermes_2.5_mistral_7b,0.8603351955307262,arena_elo,BLZ_240312,[],holistic | |
| 24,openchat_3.5,0.8587390263367917,arena_elo,BLZ_240312,[],holistic | |
| 25,pplx_70b_online,0.8587390263367917,arena_elo,BLZ_240312,[],holistic | |
| 26,gpt_3.5_turbo_1106,0.8547486033519553,arena_elo,BLZ_240312,[],holistic | |
| 27,solar_10.7b_instruct_v1.0,0.8499600957701516,arena_elo,BLZ_240312,[],holistic | |
| 28,dolphin_2.2.1_mistral_7b,0.8499600957701516,arena_elo,BLZ_240312,[],holistic | |
| 29,wizardlm_13b_v1.2,0.8443735035913806,arena_elo,BLZ_240312,[],holistic | |
| 30,zephyr_7b_beta,0.8387869114126097,arena_elo,BLZ_240312,[],holistic | |
| 31,mpt_30b_chat,0.8332003192338387,arena_elo,BLZ_240312,[],holistic | |
| 32,codellama_34b_instruct,0.8324022346368715,arena_elo,BLZ_240312,[],holistic | |
| 33,llama_2_13b_chat,0.8316041500399042,arena_elo,BLZ_240312,[],holistic | |
| 34,vicuna_13b,0.8300079808459697,arena_elo,BLZ_240312,[],holistic | |
| 35,pplx_7b_online,0.8284118116520351,arena_elo,BLZ_240312,[],holistic | |
| 36,zephyr_7b_alpha,0.8276137270550679,arena_elo,BLZ_240312,[],holistic | |
| 37,qwen_14b_chat,0.825219473264166,arena_elo,BLZ_240312,[],holistic | |
| 38,falcon_180b_chat,0.8236233040702314,arena_elo,BLZ_240312,[],holistic | |
| 39,guanaco_33b,0.8236233040702314,arena_elo,BLZ_240312,[],holistic | |
| 40,llama_2_7b_chat,0.8172386272944933,arena_elo,BLZ_240312,[],holistic | |
| 41,stripedhyena_nous_7b,0.8140462889066241,arena_elo,BLZ_240312,[],holistic | |
| 42,mistral_7b_instruct_v0.1,0.8028731045490822,arena_elo,BLZ_240312,[],holistic | |
| 43,palm_chat_bison_001,0.8028731045490822,arena_elo,BLZ_240312,[],holistic | |
| 44,vicuna_7b,0.8020750199521149,arena_elo,BLZ_240312,[],holistic | |
| 45,koala_13b,0.770949720670391,arena_elo,BLZ_240312,[],holistic | |
| 46,chatglm3_6b,0.7661612130885874,arena_elo,BLZ_240312,[],holistic | |
| 47,gpt4all_13b_snoozy,0.74780526735834,arena_elo,BLZ_240312,[],holistic | |
| 48,mpt_7b_chat,0.7430167597765364,arena_elo,BLZ_240312,[],holistic | |
| 49,chatglm2_6b,0.7422186751795691,arena_elo,BLZ_240312,[],holistic | |
| 50,rwkv_4_raven_14b,0.7382282521947326,arena_elo,BLZ_240312,[],holistic | |
| 51,alpaca_13b,0.7214684756584198,arena_elo,BLZ_240312,[],holistic | |
| 52,openassistant_pythia_12b,0.7158818834796489,arena_elo,BLZ_240312,[],holistic | |
| 53,chatglm_6b,0.704708699122107,arena_elo,BLZ_240312,[],holistic | |
| 54,fastchat_t5_3b,0.6975259377494014,arena_elo,BLZ_240312,[],holistic | |
| 55,stablelm_tuned_alpha_7b,0.6743814844373504,arena_elo,BLZ_240312,[],holistic | |
| 56,dolly_v2_12b,0.6568236233040702,arena_elo,BLZ_240312,[],holistic | |
| 57,llama_13b,0.6384676775738228,arena_elo,BLZ_240312,[],holistic | |
| 709,gpt_4_1106_preview,0.8390000000000001,bbh,BLZ_240312,[],holistic | |
| 711,gpt_4_0314,0.867,bbh,BLZ_240312,[],holistic | |
| 712,gpt_4_0613,0.867,bbh,BLZ_240312,[],holistic | |
| 714,claude_1,0.6729999999999999,bbh,BLZ_240312,[],holistic | |
| 716,gemini_pro_dev_api,0.6559999999999999,bbh,BLZ_240312,[],holistic | |
| 718,gpt_3.5_turbo_0613,0.71,bbh,BLZ_240312,[],holistic | |
| 719,mixtral_8x7b_instruct_v0.1,0.67,bbh,BLZ_240312,[],holistic | |
| 720,yi_34b_chat,0.7170000000000001,bbh,BLZ_240312,[],holistic | |
| 721,gemini_pro,0.6559999999999999,bbh,BLZ_240312,[],holistic | |
| 725,tulu_2_dpo_70b,0.66,bbh,BLZ_240312,[],holistic | |
| 726,vicuna_33b,0.52,bbh,BLZ_240312,[],holistic | |
| 729,llama_2_70b_chat,0.608,bbh,BLZ_240312,[],holistic | |
| 734,gpt_3.5_turbo_1106,0.71,bbh,BLZ_240312,[],holistic | |
| 736,dolphin_2.2.1_mistral_7b,0.598,bbh,BLZ_240312,[],holistic | |
| 741,llama_2_13b_chat,0.5820000000000001,bbh,BLZ_240312,[],holistic | |
| 742,vicuna_13b,0.515,bbh,BLZ_240312,[],holistic | |
| 745,qwen_14b_chat,0.537,bbh,BLZ_240312,[],holistic | |
| 748,llama_2_7b_chat,0.35600000000000004,bbh,BLZ_240312,[],holistic | |
| 750,mistral_7b_instruct_v0.1,0.5670000000000001,bbh,BLZ_240312,[],holistic | |
| 752,vicuna_7b,0.434,bbh,BLZ_240312,[],holistic | |
| 765,llama_13b,0.379,bbh,BLZ_240312,[],holistic | |
| 1122,gpt_4_1106_preview,0.8604999999999999,eq_benchv2,BLZ_240312,[],holistic | |
| 1124,gpt_4_0314,0.8573000000000001,eq_benchv2,BLZ_240312,[],holistic | |
| 1125,gpt_4_0613,0.8479000000000001,eq_benchv2,BLZ_240312,[],holistic | |
| 1126,mistral_medium,0.8256999999999999,eq_benchv2,BLZ_240312,[],holistic | |
| 1127,claude_1,0.7683,eq_benchv2,BLZ_240312,[],holistic | |
| 1128,claude_2.0,0.7289,eq_benchv2,BLZ_240312,[],holistic | |
| 1129,gemini_pro_dev_api,0.7508,eq_benchv2,BLZ_240312,[],holistic | |
| 1130,claude_2.1,0.7395999999999999,eq_benchv2,BLZ_240312,[],holistic | |
| 1131,gpt_3.5_turbo_0613,0.6934999999999999,eq_benchv2,BLZ_240312,[],holistic | |
| 1132,mixtral_8x7b_instruct_v0.1,0.7237,eq_benchv2,BLZ_240312,[],holistic | |
| 1133,yi_34b_chat,0.7162000000000001,eq_benchv2,BLZ_240312,[],holistic | |
| 1135,claude_instant_1,0.6904,eq_benchv2,BLZ_240312,[],holistic | |
| 1136,gpt_3.5_turbo_0314,0.7067,eq_benchv2,BLZ_240312,[],holistic | |
| 1137,wizardlm_70b_v1.0,0.7128,eq_benchv2,BLZ_240312,[],holistic | |
| 1138,tulu_2_dpo_70b,0.7663,eq_benchv2,BLZ_240312,[],holistic | |
| 1139,vicuna_33b,0.6707,eq_benchv2,BLZ_240312,[],holistic | |
| 1140,starling_lm_7b_alpha,0.7390000000000001,eq_benchv2,BLZ_240312,[],holistic | |
| 1141,deepseek_llm_67b_chat,0.7753,eq_benchv2,BLZ_240312,[],holistic | |
| 1142,llama_2_70b_chat,0.7359,eq_benchv2,BLZ_240312,[],holistic | |
| 1144,openhermes_2.5_mistral_7b,0.6689,eq_benchv2,BLZ_240312,[],holistic | |
| 1145,openchat_3.5,0.7218000000000001,eq_benchv2,BLZ_240312,[],holistic | |
| 1146,pplx_70b_online,0.6279,eq_benchv2,BLZ_240312,[],holistic | |
| 1147,gpt_3.5_turbo_1106,0.7173999999999999,eq_benchv2,BLZ_240312,[],holistic | |
| 1148,solar_10.7b_instruct_v1.0,0.7353000000000001,eq_benchv2,BLZ_240312,[],holistic | |
| 1149,dolphin_2.2.1_mistral_7b,0.6992,eq_benchv2,BLZ_240312,[],holistic | |
| 1150,wizardlm_13b_v1.2,0.6371,eq_benchv2,BLZ_240312,[],holistic | |
| 1151,zephyr_7b_beta,0.5832999999999999,eq_benchv2,BLZ_240312,[],holistic | |
| 1153,codellama_34b_instruct,0.4915,eq_benchv2,BLZ_240312,[],holistic | |
| 1154,llama_2_13b_chat,0.49119999999999997,eq_benchv2,BLZ_240312,[],holistic | |
| 1155,vicuna_13b,0.6739,eq_benchv2,BLZ_240312,[],holistic | |
| 1156,pplx_7b_online,0.4891,eq_benchv2,BLZ_240312,[],holistic | |
| 1157,zephyr_7b_alpha,0.5682,eq_benchv2,BLZ_240312,[],holistic | |
| 1158,qwen_14b_chat,0.6347,eq_benchv2,BLZ_240312,[],holistic | |
| 1159,falcon_180b_chat,0.5682,eq_benchv2,BLZ_240312,[],holistic | |
| 1160,guanaco_33b,0.3611,eq_benchv2,BLZ_240312,[],holistic | |
| 1161,llama_2_7b_chat,0.3632,eq_benchv2,BLZ_240312,[],holistic | |
| 1162,stripedhyena_nous_7b,0.5458,eq_benchv2,BLZ_240312,[],holistic | |
| 1163,mistral_7b_instruct_v0.1,0.5215,eq_benchv2,BLZ_240312,[],holistic | |
| 1179,yi_34bx2_moe_60b,0.7269,eq_benchv2,BLZ_240312,[],holistic | |
| 542,mixtral_8x7b_instruct_v0.1,0.7641,gpt4all,BLZ_240312,[],holistic | |
| 543,yi_34b_chat,0.7212999999999999,gpt4all,BLZ_240312,[],holistic | |
| 550,starling_lm_7b_alpha,0.7272,gpt4all,BLZ_240312,[],holistic | |
| 554,openhermes_2.5_mistral_7b,0.7312000000000001,gpt4all,BLZ_240312,[],holistic | |
| 555,openchat_3.5,0.7292000000000001,gpt4all,BLZ_240312,[],holistic | |
| 558,solar_10.7b_instruct_v1.0,0.7511,gpt4all,BLZ_240312,[],holistic | |
| 559,dolphin_2.2.1_mistral_7b,0.7223999999999999,gpt4all,BLZ_240312,[],holistic | |
| 561,zephyr_7b_beta,0.7182999999999999,gpt4all,BLZ_240312,[],holistic | |
| 565,vicuna_13b,0.631,gpt4all,BLZ_240312,[],holistic | |
| 567,zephyr_7b_alpha,0.7223999999999999,gpt4all,BLZ_240312,[],holistic | |
| 573,mistral_7b_instruct_v0.1,0.6795,gpt4all,BLZ_240312,[],holistic | |
| 575,vicuna_7b,0.61,gpt4all,BLZ_240312,[],holistic | |
| 576,koala_13b,0.62,gpt4all,BLZ_240312,[],holistic | |
| 578,gpt4all_13b_snoozy,0.653,gpt4all,BLZ_240312,[],holistic | |
| 579,mpt_7b_chat,0.648,gpt4all,BLZ_240312,[],holistic | |
| 583,openassistant_pythia_12b,0.61,gpt4all,BLZ_240312,[],holistic | |
| 585,fastchat_t5_3b,0.537,gpt4all,BLZ_240312,[],holistic | |
| 586,stablelm_tuned_alpha_7b,0.513,gpt4all,BLZ_240312,[],holistic | |
| 588,llama_13b,0.63,gpt4all,BLZ_240312,[],holistic | |
| 477,mistral_medium,0.667,gsm8k,BLZ_240312,[],math | |
| 483,mixtral_8x7b_instruct_v0.1,0.6073,gsm8k,BLZ_240312,[],math | |
| 484,yi_34b_chat,0.31920000000000004,gsm8k,BLZ_240312,[],math | |
| 487,gpt_3.5_turbo_0314,0.5710000000000001,gsm8k,BLZ_240312,[],math | |
| 488,wizardlm_70b_v1.0,0.1797,gsm8k,BLZ_240312,[],math | |
| 489,tulu_2_dpo_70b,0.6262,gsm8k,BLZ_240312,[],math | |
| 490,vicuna_33b,0.13720000000000002,gsm8k,BLZ_240312,[],math | |
| 491,starling_lm_7b_alpha,0.624,gsm8k,BLZ_240312,[],math | |
| 493,llama_2_70b_chat,0.2669,gsm8k,BLZ_240312,[],math | |
| 495,openhermes_2.5_mistral_7b,0.2608,gsm8k,BLZ_240312,[],math | |
| 496,openchat_3.5,0.26839999999999997,gsm8k,BLZ_240312,[],math | |
| 499,solar_10.7b_instruct_v1.0,0.6475,gsm8k,BLZ_240312,[],math | |
| 500,dolphin_2.2.1_mistral_7b,0.4807,gsm8k,BLZ_240312,[],math | |
| 501,wizardlm_13b_v1.2,0.135,gsm8k,BLZ_240312,[],math | |
| 502,zephyr_7b_beta,0.2904,gsm8k,BLZ_240312,[],math | |
| 503,mpt_30b_chat,0.1213,gsm8k,BLZ_240312,[],math | |
| 504,codellama_34b_instruct,0.37979999999999997,gsm8k,BLZ_240312,[],math | |
| 505,llama_2_13b_chat,0.1524,gsm8k,BLZ_240312,[],math | |
| 506,vicuna_13b,0.113,gsm8k,BLZ_240312,[],math | |
| 508,zephyr_7b_alpha,0.14029999999999998,gsm8k,BLZ_240312,[],math | |
| 509,qwen_14b_chat,0.597,gsm8k,BLZ_240312,[],math | |
| 510,falcon_180b_chat,0.4594,gsm8k,BLZ_240312,[],math | |
| 512,llama_2_7b_chat,0.0735,gsm8k,BLZ_240312,[],math | |
| 514,mistral_7b_instruct_v0.1,0.1425,gsm8k,BLZ_240312,[],math | |
| 516,vicuna_7b,0.0819,gsm8k,BLZ_240312,[],math | |
| 530,yi_34bx2_moe_60b,0.7551000000000001,gsm8k,BLZ_240312,[],math | |
| 239,gpt_4_0314,0.953,hellaswag,BLZ_240312,[],reasoning | |
| 241,mistral_medium,0.88,hellaswag,BLZ_240312,[],reasoning | |
| 247,mixtral_8x7b_instruct_v0.1,0.8763,hellaswag,BLZ_240312,[],reasoning | |
| 248,yi_34b_chat,0.8416,hellaswag,BLZ_240312,[],reasoning | |
| 251,gpt_3.5_turbo_0314,0.706,hellaswag,BLZ_240312,[],reasoning | |
| 252,wizardlm_70b_v1.0,0.8441,hellaswag,BLZ_240312,[],reasoning | |
| 253,tulu_2_dpo_70b,0.8898999999999999,hellaswag,BLZ_240312,[],reasoning | |
| 254,vicuna_33b,0.83,hellaswag,BLZ_240312,[],reasoning | |
| 255,starling_lm_7b_alpha,0.8490000000000001,hellaswag,BLZ_240312,[],reasoning | |
| 257,llama_2_70b_chat,0.8588,hellaswag,BLZ_240312,[],reasoning | |
| 259,openhermes_2.5_mistral_7b,0.8418000000000001,hellaswag,BLZ_240312,[],reasoning | |
| 260,openchat_3.5,0.8479000000000001,hellaswag,BLZ_240312,[],reasoning | |
| 263,solar_10.7b_instruct_v1.0,0.8815999999999999,hellaswag,BLZ_240312,[],reasoning | |
| 264,dolphin_2.2.1_mistral_7b,0.8376,hellaswag,BLZ_240312,[],reasoning | |
| 265,wizardlm_13b_v1.2,0.8220999999999999,hellaswag,BLZ_240312,[],reasoning | |
| 266,zephyr_7b_beta,0.8436,hellaswag,BLZ_240312,[],reasoning | |
| 267,mpt_30b_chat,0.8254,hellaswag,BLZ_240312,[],reasoning | |
| 268,codellama_34b_instruct,0.7692,hellaswag,BLZ_240312,[],reasoning | |
| 269,llama_2_13b_chat,0.8194,hellaswag,BLZ_240312,[],reasoning | |
| 270,vicuna_13b,0.8123999999999999,hellaswag,BLZ_240312,[],reasoning | |
| 272,zephyr_7b_alpha,0.8404,hellaswag,BLZ_240312,[],reasoning | |
| 274,falcon_180b_chat,0.8886,hellaswag,BLZ_240312,[],reasoning | |
| 276,llama_2_7b_chat,0.7855,hellaswag,BLZ_240312,[],reasoning | |
| 278,mistral_7b_instruct_v0.1,0.7563,hellaswag,BLZ_240312,[],reasoning | |
| 280,vicuna_7b,0.7739,hellaswag,BLZ_240312,[],reasoning | |
| 294,yi_34bx2_moe_60b,0.8523000000000001,hellaswag,BLZ_240312,[],reasoning | |
| 129,mixtral_8x7b_instruct_v0.1,0.7262000000000001,hugging_6,BLZ_240312,[],holistic | |
| 130,yi_34b_chat,0.6531999999999999,hugging_6,BLZ_240312,[],holistic | |
| 134,wizardlm_70b_v1.0,0.6125,hugging_6,BLZ_240312,[],holistic | |
| 135,tulu_2_dpo_70b,0.7376999999999999,hugging_6,BLZ_240312,[],holistic | |
| 136,vicuna_33b,0.585,hugging_6,BLZ_240312,[],holistic | |
| 137,starling_lm_7b_alpha,0.6713,hugging_6,BLZ_240312,[],holistic | |
| 139,llama_2_70b_chat,0.624,hugging_6,BLZ_240312,[],holistic | |
| 141,openhermes_2.5_mistral_7b,0.6152000000000001,hugging_6,BLZ_240312,[],holistic | |
| 142,openchat_3.5,0.6124,hugging_6,BLZ_240312,[],holistic | |
| 145,solar_10.7b_instruct_v1.0,0.742,hugging_6,BLZ_240312,[],holistic | |
| 146,dolphin_2.2.1_mistral_7b,0.6493000000000001,hugging_6,BLZ_240312,[],holistic | |
| 147,wizardlm_13b_v1.2,0.5476,hugging_6,BLZ_240312,[],holistic | |
| 148,zephyr_7b_beta,0.6195,hugging_6,BLZ_240312,[],holistic | |
| 149,mpt_30b_chat,0.5538000000000001,hugging_6,BLZ_240312,[],holistic | |
| 150,codellama_34b_instruct,0.5729,hugging_6,BLZ_240312,[],holistic | |
| 151,llama_2_13b_chat,0.5490999999999999,hugging_6,BLZ_240312,[],holistic | |
| 152,vicuna_13b,0.5539999999999999,hugging_6,BLZ_240312,[],holistic | |
| 154,zephyr_7b_alpha,0.595,hugging_6,BLZ_240312,[],holistic | |
| 156,falcon_180b_chat,0.6785,hugging_6,BLZ_240312,[],holistic | |
| 158,llama_2_7b_chat,0.5074000000000001,hugging_6,BLZ_240312,[],holistic | |
| 160,mistral_7b_instruct_v0.1,0.5496,hugging_6,BLZ_240312,[],holistic | |
| 162,vicuna_7b,0.521,hugging_6,BLZ_240312,[],holistic | |
| 176,yi_34bx2_moe_60b,0.7672,hugging_6,BLZ_240312,[],holistic | |
| 768,gpt_4_1106_preview,0.8540000000000001,humaneval,BLZ_240312,[],code | |
| 770,gpt_4_0314,0.884,humaneval,BLZ_240312,[],code | |
| 771,gpt_4_0613,0.884,humaneval,BLZ_240312,[],code | |
| 773,claude_1,0.56,humaneval,BLZ_240312,[],code | |
| 774,claude_2.0,0.7120000000000001,humaneval,BLZ_240312,[],code | |
| 775,gemini_pro_dev_api,0.634,humaneval,BLZ_240312,[],code | |
| 777,gpt_3.5_turbo_0613,0.726,humaneval,BLZ_240312,[],code | |
| 778,mixtral_8x7b_instruct_v0.1,0.5489999999999999,humaneval,BLZ_240312,[],code | |
| 780,gemini_pro,0.634,humaneval,BLZ_240312,[],code | |
| 781,claude_instant_1,0.528,humaneval,BLZ_240312,[],code | |
| 782,gpt_3.5_turbo_0314,0.732,humaneval,BLZ_240312,[],code | |
| 790,openhermes_2.5_mistral_7b,0.48200000000000004,humaneval,BLZ_240312,[],code | |
| 791,openchat_3.5,0.555,humaneval,BLZ_240312,[],code | |
| 793,gpt_3.5_turbo_1106,0.726,humaneval,BLZ_240312,[],code | |
| 797,zephyr_7b_beta,0.3,humaneval,BLZ_240312,[],code | |
| 799,codellama_34b_instruct,0.518,humaneval,BLZ_240312,[],code | |
| 801,vicuna_13b,0.171,humaneval,BLZ_240312,[],code | |
| 804,qwen_14b_chat,0.439,humaneval,BLZ_240312,[],code | |
| 809,mistral_7b_instruct_v0.1,0.287,humaneval,BLZ_240312,[],code | |
| 811,vicuna_7b,0.11599999999999999,humaneval,BLZ_240312,[],code | |
| 947,gpt_4_0314,0.93,llmonitor,BLZ_240312,[],holistic | |
| 948,gpt_4_0613,0.89,llmonitor,BLZ_240312,[],holistic | |
| 950,claude_1,0.66,llmonitor,BLZ_240312,[],holistic | |
| 951,claude_2.0,0.68,llmonitor,BLZ_240312,[],holistic | |
| 954,gpt_3.5_turbo_0613,0.81,llmonitor,BLZ_240312,[],holistic | |
| 958,claude_instant_1,0.6,llmonitor,BLZ_240312,[],holistic | |
| 959,gpt_3.5_turbo_0314,0.79,llmonitor,BLZ_240312,[],holistic | |
| 965,llama_2_70b_chat,0.6,llmonitor,BLZ_240312,[],holistic | |
| 975,mpt_30b_chat,0.4,llmonitor,BLZ_240312,[],holistic | |
| 976,codellama_34b_instruct,0.34,llmonitor,BLZ_240312,[],holistic | |
| 977,llama_2_13b_chat,0.5,llmonitor,BLZ_240312,[],holistic | |
| 978,vicuna_13b,0.5,llmonitor,BLZ_240312,[],holistic | |
| 982,falcon_180b_chat,0.67,llmonitor,BLZ_240312,[],holistic | |
| 983,guanaco_33b,0.43,llmonitor,BLZ_240312,[],holistic | |
| 984,llama_2_7b_chat,0.5,llmonitor,BLZ_240312,[],holistic | |
| 986,mistral_7b_instruct_v0.1,0.57,llmonitor,BLZ_240312,[],holistic | |
| 987,palm_chat_bison_001,0.57,llmonitor,BLZ_240312,[],holistic | |
| 988,vicuna_7b,0.41,llmonitor,BLZ_240312,[],holistic | |
| 989,koala_13b,0.31,llmonitor,BLZ_240312,[],holistic | |
| 992,mpt_7b_chat,0.43,llmonitor,BLZ_240312,[],holistic | |
| 1000,dolly_v2_12b,0.23,llmonitor,BLZ_240312,[],holistic | |
| 1185,mistral_medium,0.654,magi,BLZ_240312,[],holistic | |
| 1188,gemini_pro_dev_api,0.528,magi,BLZ_240312,[],holistic | |
| 1190,gpt_3.5_turbo_0613,0.455,magi,BLZ_240312,[],holistic | |
| 1191,mixtral_8x7b_instruct_v0.1,0.49560000000000004,magi,BLZ_240312,[],holistic | |
| 1192,yi_34b_chat,0.5821999999999999,magi,BLZ_240312,[],holistic | |
| 1195,gpt_3.5_turbo_0314,0.512,magi,BLZ_240312,[],holistic | |
| 1196,wizardlm_70b_v1.0,0.4476,magi,BLZ_240312,[],holistic | |
| 1197,tulu_2_dpo_70b,0.5212,magi,BLZ_240312,[],holistic | |
| 1198,vicuna_33b,0.3837,magi,BLZ_240312,[],holistic | |
| 1199,starling_lm_7b_alpha,0.4304,magi,BLZ_240312,[],holistic | |
| 1200,deepseek_llm_67b_chat,0.5946,magi,BLZ_240312,[],holistic | |
| 1201,llama_2_70b_chat,0.39899999999999997,magi,BLZ_240312,[],holistic | |
| 1203,openhermes_2.5_mistral_7b,0.4236,magi,BLZ_240312,[],holistic | |
| 1204,openchat_3.5,0.42200000000000004,magi,BLZ_240312,[],holistic | |
| 1206,gpt_3.5_turbo_1106,0.462,magi,BLZ_240312,[],holistic | |
| 1207,solar_10.7b_instruct_v1.0,0.4693,magi,BLZ_240312,[],holistic | |
| 1208,dolphin_2.2.1_mistral_7b,0.3782,magi,BLZ_240312,[],holistic | |
| 1209,wizardlm_13b_v1.2,0.3678,magi,BLZ_240312,[],holistic | |
| 1210,zephyr_7b_beta,0.4042,magi,BLZ_240312,[],holistic | |
| 1213,llama_2_13b_chat,0.37170000000000003,magi,BLZ_240312,[],holistic | |
| 1214,vicuna_13b,0.36560000000000004,magi,BLZ_240312,[],holistic | |
| 1216,zephyr_7b_alpha,0.39899999999999997,magi,BLZ_240312,[],holistic | |
| 1217,qwen_14b_chat,0.4535,magi,BLZ_240312,[],holistic | |
| 1219,guanaco_33b,0.38659999999999994,magi,BLZ_240312,[],holistic | |
| 1220,llama_2_7b_chat,0.35969999999999996,magi,BLZ_240312,[],holistic | |
| 1222,mistral_7b_instruct_v0.1,0.3704,magi,BLZ_240312,[],holistic | |
| 1063,gpt_4_1106_preview,0.83,mbpp,BLZ_240312,[],code | |
| 1067,mistral_medium,0.623,mbpp,BLZ_240312,[],code | |
| 1070,gemini_pro_dev_api,0.7290000000000001,mbpp,BLZ_240312,[],code | |
| 1073,mixtral_8x7b_instruct_v0.1,0.607,mbpp,BLZ_240312,[],code | |
| 1075,gemini_pro,0.7290000000000001,mbpp,BLZ_240312,[],code | |
| 1077,gpt_3.5_turbo_0314,0.816,mbpp,BLZ_240312,[],code | |
| 1089,solar_10.7b_instruct_v1.0,0.429,mbpp,BLZ_240312,[],code | |
| 1092,zephyr_7b_beta,0.41100000000000003,mbpp,BLZ_240312,[],code | |
| 296,gpt_4_1106_preview,0.805,mmlu,BLZ_240312,[],knowledge | |
| 298,gpt_4_0314,0.8640000000000001,mmlu,BLZ_240312,[],knowledge | |
| 300,mistral_medium,0.753,mmlu,BLZ_240312,[],knowledge | |
| 301,claude_1,0.77,mmlu,BLZ_240312,[],knowledge | |
| 302,claude_2.0,0.785,mmlu,BLZ_240312,[],knowledge | |
| 303,gemini_pro_dev_api,0.718,mmlu,BLZ_240312,[],knowledge | |
| 306,mixtral_8x7b_instruct_v0.1,0.706,mmlu,BLZ_240312,[],knowledge | |
| 307,yi_34b_chat,0.735,mmlu,BLZ_240312,[],knowledge | |
| 308,gemini_pro,0.718,mmlu,BLZ_240312,[],knowledge | |
| 309,claude_instant_1,0.7340000000000001,mmlu,BLZ_240312,[],knowledge | |
| 310,gpt_3.5_turbo_0314,0.7,mmlu,BLZ_240312,[],knowledge | |
| 311,wizardlm_70b_v1.0,0.637,mmlu,BLZ_240312,[],knowledge | |
| 312,tulu_2_dpo_70b,0.698,mmlu,BLZ_240312,[],knowledge | |
| 313,vicuna_33b,0.5920000000000001,mmlu,BLZ_240312,[],knowledge | |
| 314,starling_lm_7b_alpha,0.639,mmlu,BLZ_240312,[],knowledge | |
| 315,deepseek_llm_67b_chat,0.713,mmlu,BLZ_240312,[],knowledge | |
| 316,llama_2_70b_chat,0.63,mmlu,BLZ_240312,[],knowledge | |
| 317,nv_llama2_70b_steerlm_chat,0.685,mmlu,BLZ_240312,[],knowledge | |
| 318,openhermes_2.5_mistral_7b,0.638,mmlu,BLZ_240312,[],knowledge | |
| 319,openchat_3.5,0.643,mmlu,BLZ_240312,[],knowledge | |
| 321,gpt_3.5_turbo_1106,0.6779999999999999,mmlu,BLZ_240312,[],knowledge | |
| 322,solar_10.7b_instruct_v1.0,0.662,mmlu,BLZ_240312,[],knowledge | |
| 323,dolphin_2.2.1_mistral_7b,0.632,mmlu,BLZ_240312,[],knowledge | |
| 324,wizardlm_13b_v1.2,0.527,mmlu,BLZ_240312,[],knowledge | |
| 325,zephyr_7b_beta,0.614,mmlu,BLZ_240312,[],knowledge | |
| 326,mpt_30b_chat,0.504,mmlu,BLZ_240312,[],knowledge | |
| 327,codellama_34b_instruct,0.537,mmlu,BLZ_240312,[],knowledge | |
| 328,llama_2_13b_chat,0.536,mmlu,BLZ_240312,[],knowledge | |
| 329,vicuna_13b,0.5579999999999999,mmlu,BLZ_240312,[],knowledge | |
| 331,zephyr_7b_alpha,0.614,mmlu,BLZ_240312,[],knowledge | |
| 332,qwen_14b_chat,0.665,mmlu,BLZ_240312,[],knowledge | |
| 333,falcon_180b_chat,0.68,mmlu,BLZ_240312,[],knowledge | |
| 334,guanaco_33b,0.5760000000000001,mmlu,BLZ_240312,[],knowledge | |
| 335,llama_2_7b_chat,0.45799999999999996,mmlu,BLZ_240312,[],knowledge | |
| 337,mistral_7b_instruct_v0.1,0.5539999999999999,mmlu,BLZ_240312,[],knowledge | |
| 339,vicuna_7b,0.51,mmlu,BLZ_240312,[],knowledge | |
| 340,koala_13b,0.447,mmlu,BLZ_240312,[],knowledge | |
| 342,gpt4all_13b_snoozy,0.43,mmlu,BLZ_240312,[],knowledge | |
| 343,mpt_7b_chat,0.32,mmlu,BLZ_240312,[],knowledge | |
| 344,chatglm2_6b,0.455,mmlu,BLZ_240312,[],knowledge | |
| 345,rwkv_4_raven_14b,0.256,mmlu,BLZ_240312,[],knowledge | |
| 346,alpaca_13b,0.48100000000000004,mmlu,BLZ_240312,[],knowledge | |
| 347,openassistant_pythia_12b,0.27,mmlu,BLZ_240312,[],knowledge | |
| 348,chatglm_6b,0.361,mmlu,BLZ_240312,[],knowledge | |
| 349,fastchat_t5_3b,0.47700000000000004,mmlu,BLZ_240312,[],knowledge | |
| 350,stablelm_tuned_alpha_7b,0.244,mmlu,BLZ_240312,[],knowledge | |
| 351,dolly_v2_12b,0.257,mmlu,BLZ_240312,[],knowledge | |
| 352,llama_13b,0.47,mmlu,BLZ_240312,[],knowledge | |
| 353,yi_34bx2_moe_60b,0.775,mmlu,BLZ_240312,[],knowledge | |
| 59,gpt_4_0125_preview,0.0929,mt_bench,BLZ_240312,[],holistic | |
| 60,gpt_4_1106_preview,0.0932,mt_bench,BLZ_240312,[],holistic | |
| 62,gpt_4_0314,0.08960000000000001,mt_bench,BLZ_240312,[],holistic | |
| 63,gpt_4_0613,0.09179999999999999,mt_bench,BLZ_240312,[],holistic | |
| 64,mistral_medium,0.0861,mt_bench,BLZ_240312,[],holistic | |
| 65,claude_1,0.079,mt_bench,BLZ_240312,[],holistic | |
| 66,claude_2.0,0.0806,mt_bench,BLZ_240312,[],holistic | |
| 67,gemini_pro_dev_api,0.08039999999999999,mt_bench,BLZ_240312,[],holistic | |
| 68,claude_2.1,0.0818,mt_bench,BLZ_240312,[],holistic | |
| 69,gpt_3.5_turbo_0613,0.0839,mt_bench,BLZ_240312,[],holistic | |
| 70,mixtral_8x7b_instruct_v0.1,0.083,mt_bench,BLZ_240312,[],holistic | |
| 71,yi_34b_chat,0.07769999999999999,mt_bench,BLZ_240312,[],holistic | |
| 72,gemini_pro,0.08039999999999999,mt_bench,BLZ_240312,[],holistic | |
| 73,claude_instant_1,0.0785,mt_bench,BLZ_240312,[],holistic | |
| 74,gpt_3.5_turbo_0314,0.0794,mt_bench,BLZ_240312,[],holistic | |
| 75,wizardlm_70b_v1.0,0.0771,mt_bench,BLZ_240312,[],holistic | |
| 76,tulu_2_dpo_70b,0.0789,mt_bench,BLZ_240312,[],holistic | |
| 77,vicuna_33b,0.0712,mt_bench,BLZ_240312,[],holistic | |
| 78,starling_lm_7b_alpha,0.0809,mt_bench,BLZ_240312,[],holistic | |
| 79,deepseek_llm_67b_chat,0.08529999999999999,mt_bench,BLZ_240312,[],holistic | |
| 80,llama_2_70b_chat,0.06860000000000001,mt_bench,BLZ_240312,[],holistic | |
| 81,nv_llama2_70b_steerlm_chat,0.0754,mt_bench,BLZ_240312,[],holistic | |
| 82,openhermes_2.5_mistral_7b,0.07690000000000001,mt_bench,BLZ_240312,[],holistic | |
| 83,openchat_3.5,0.0781,mt_bench,BLZ_240312,[],holistic | |
| 84,pplx_70b_online,0.0588,mt_bench,BLZ_240312,[],holistic | |
| 85,gpt_3.5_turbo_1106,0.0832,mt_bench,BLZ_240312,[],holistic | |
| 86,solar_10.7b_instruct_v1.0,0.0758,mt_bench,BLZ_240312,[],holistic | |
| 88,wizardlm_13b_v1.2,0.07200000000000001,mt_bench,BLZ_240312,[],holistic | |
| 89,zephyr_7b_beta,0.07339999999999999,mt_bench,BLZ_240312,[],holistic | |
| 90,mpt_30b_chat,0.0639,mt_bench,BLZ_240312,[],holistic | |
| 92,llama_2_13b_chat,0.0665,mt_bench,BLZ_240312,[],holistic | |
| 93,vicuna_13b,0.06570000000000001,mt_bench,BLZ_240312,[],holistic | |
| 95,zephyr_7b_alpha,0.0688,mt_bench,BLZ_240312,[],holistic | |
| 96,qwen_14b_chat,0.0696,mt_bench,BLZ_240312,[],holistic | |
| 98,guanaco_33b,0.0653,mt_bench,BLZ_240312,[],holistic | |
| 99,llama_2_7b_chat,0.06269999999999999,mt_bench,BLZ_240312,[],holistic | |
| 101,mistral_7b_instruct_v0.1,0.0684,mt_bench,BLZ_240312,[],holistic | |
| 102,palm_chat_bison_001,0.064,mt_bench,BLZ_240312,[],holistic | |
| 103,vicuna_7b,0.0617,mt_bench,BLZ_240312,[],holistic | |
| 104,koala_13b,0.0535,mt_bench,BLZ_240312,[],holistic | |
| 106,gpt4all_13b_snoozy,0.0541,mt_bench,BLZ_240312,[],holistic | |
| 107,mpt_7b_chat,0.0542,mt_bench,BLZ_240312,[],holistic | |
| 108,chatglm2_6b,0.0496,mt_bench,BLZ_240312,[],holistic | |
| 109,rwkv_4_raven_14b,0.0398,mt_bench,BLZ_240312,[],holistic | |
| 110,alpaca_13b,0.0453,mt_bench,BLZ_240312,[],holistic | |
| 111,openassistant_pythia_12b,0.0432,mt_bench,BLZ_240312,[],holistic | |
| 112,chatglm_6b,0.045,mt_bench,BLZ_240312,[],holistic | |
| 113,fastchat_t5_3b,0.0304,mt_bench,BLZ_240312,[],holistic | |
| 114,stablelm_tuned_alpha_7b,0.0275,mt_bench,BLZ_240312,[],holistic | |
| 115,dolly_v2_12b,0.032799999999999996,mt_bench,BLZ_240312,[],holistic | |
| 116,llama_13b,0.026099999999999998,mt_bench,BLZ_240312,[],holistic | |
| 357,gpt_4_0314,0.59,truthfulqa,BLZ_240312,[],knowledge | |
| 365,mixtral_8x7b_instruct_v0.1,0.6457999999999999,truthfulqa,BLZ_240312,[],knowledge | |
| 366,yi_34b_chat,0.5537,truthfulqa,BLZ_240312,[],knowledge | |
| 370,wizardlm_70b_v1.0,0.5481,truthfulqa,BLZ_240312,[],knowledge | |
| 371,tulu_2_dpo_70b,0.6578,truthfulqa,BLZ_240312,[],knowledge | |
| 372,vicuna_33b,0.5616,truthfulqa,BLZ_240312,[],knowledge | |
| 373,starling_lm_7b_alpha,0.4639,truthfulqa,BLZ_240312,[],knowledge | |
| 375,llama_2_70b_chat,0.528,truthfulqa,BLZ_240312,[],knowledge | |
| 377,openhermes_2.5_mistral_7b,0.5224,truthfulqa,BLZ_240312,[],knowledge | |
| 378,openchat_3.5,0.46380000000000005,truthfulqa,BLZ_240312,[],knowledge | |
| 381,solar_10.7b_instruct_v1.0,0.7143,truthfulqa,BLZ_240312,[],knowledge | |
| 382,dolphin_2.2.1_mistral_7b,0.5311,truthfulqa,BLZ_240312,[],knowledge | |
| 383,wizardlm_13b_v1.2,0.4727,truthfulqa,BLZ_240312,[],knowledge | |
| 384,zephyr_7b_beta,0.5745,truthfulqa,BLZ_240312,[],knowledge | |
| 385,mpt_30b_chat,0.5242,truthfulqa,BLZ_240312,[],knowledge | |
| 386,codellama_34b_instruct,0.44439999999999996,truthfulqa,BLZ_240312,[],knowledge | |
| 387,llama_2_13b_chat,0.4412,truthfulqa,BLZ_240312,[],knowledge | |
| 388,vicuna_13b,0.5151,truthfulqa,BLZ_240312,[],knowledge | |
| 390,zephyr_7b_alpha,0.579,truthfulqa,BLZ_240312,[],knowledge | |
| 392,falcon_180b_chat,0.4547,truthfulqa,BLZ_240312,[],knowledge | |
| 394,llama_2_7b_chat,0.4557,truthfulqa,BLZ_240312,[],knowledge | |
| 396,mistral_7b_instruct_v0.1,0.5628,truthfulqa,BLZ_240312,[],knowledge | |
| 398,vicuna_7b,0.5034000000000001,truthfulqa,BLZ_240312,[],knowledge | |
| 412,yi_34bx2_moe_60b,0.6618999999999999,truthfulqa,BLZ_240312,[],knowledge | |
| 418,mistral_medium,0.88,winogrande,BLZ_240312,[],reasoning | |
| 424,mixtral_8x7b_instruct_v0.1,0.8137000000000001,winogrande,BLZ_240312,[],reasoning | |
| 425,yi_34b_chat,0.8011,winogrande,BLZ_240312,[],reasoning | |
| 428,gpt_3.5_turbo_0314,0.852,winogrande,BLZ_240312,[],reasoning | |
| 429,wizardlm_70b_v1.0,0.8081999999999999,winogrande,BLZ_240312,[],reasoning | |
| 430,tulu_2_dpo_70b,0.8327,winogrande,BLZ_240312,[],reasoning | |
| 431,vicuna_33b,0.7703,winogrande,BLZ_240312,[],reasoning | |
| 432,starling_lm_7b_alpha,0.8058,winogrande,BLZ_240312,[],reasoning | |
| 434,llama_2_70b_chat,0.8051,winogrande,BLZ_240312,[],reasoning | |
| 436,openhermes_2.5_mistral_7b,0.7806000000000001,winogrande,BLZ_240312,[],reasoning | |
| 437,openchat_3.5,0.8058,winogrande,BLZ_240312,[],reasoning | |
| 440,solar_10.7b_instruct_v1.0,0.8358,winogrande,BLZ_240312,[],reasoning | |
| 441,dolphin_2.2.1_mistral_7b,0.7814,winogrande,BLZ_240312,[],reasoning | |
| 442,wizardlm_13b_v1.2,0.7190000000000001,winogrande,BLZ_240312,[],reasoning | |
| 443,zephyr_7b_beta,0.7774,winogrande,BLZ_240312,[],reasoning | |
| 444,mpt_30b_chat,0.753,winogrande,BLZ_240312,[],reasoning | |
| 445,codellama_34b_instruct,0.7459,winogrande,BLZ_240312,[],reasoning | |
| 446,llama_2_13b_chat,0.7451000000000001,winogrande,BLZ_240312,[],reasoning | |
| 447,vicuna_13b,0.7465999999999999,winogrande,BLZ_240312,[],reasoning | |
| 449,zephyr_7b_alpha,0.7861,winogrande,BLZ_240312,[],reasoning | |
| 451,falcon_180b_chat,0.8690000000000001,winogrande,BLZ_240312,[],reasoning | |
| 453,llama_2_7b_chat,0.7173999999999999,winogrande,BLZ_240312,[],reasoning | |
| 455,mistral_7b_instruct_v0.1,0.7372,winogrande,BLZ_240312,[],reasoning | |
| 457,vicuna_7b,0.7214,winogrande,BLZ_240312,[],reasoning | |
| 471,yi_34bx2_moe_60b,0.8484999999999999,winogrande,BLZ_240312,[],reasoning | |
| 0,gpt_4_0613,0.957,helm_lite_mwr,helm_lite_240610,"['narrativeqa', 'naturalquestions_open', 'naturalquestions_closed', 'openbookqa', 'mmlu', 'math', 'gsm8k', 'legalbench', 'medqa', 'wmt_2014']",holistic | |
| 1,llama_3_70b,0.902,helm_lite_mwr,helm_lite_240610,"['narrativeqa', 'naturalquestions_open', 'naturalquestions_closed', 'openbookqa', 'mmlu', 'math', 'gsm8k', 'legalbench', 'medqa', 'wmt_2014']",holistic | |
| 2,mixtral_8x22b,0.855,helm_lite_mwr,helm_lite_240610,"['narrativeqa', 'naturalquestions_open', 'naturalquestions_closed', 'openbookqa', 'mmlu', 'math', 'gsm8k', 'legalbench', 'medqa', 'wmt_2014']",holistic | |
| 3,palmyra_x_v3_72b,0.826,helm_lite_mwr,helm_lite_240610,"['narrativeqa', 'naturalquestions_open', 'naturalquestions_closed', 'openbookqa', 'mmlu', 'math', 'gsm8k', 'legalbench', 'medqa', 'wmt_2014']",holistic | |
| 4,gpt_4_turbo_1106_preview,0.821,helm_lite_mwr,helm_lite_240610,"['narrativeqa', 'naturalquestions_open', 'naturalquestions_closed', 'openbookqa', 'mmlu', 'math', 'gsm8k', 'legalbench', 'medqa', 'wmt_2014']",holistic | |
| 5,palm_2_unicorn,0.781,helm_lite_mwr,helm_lite_240610,"['narrativeqa', 'naturalquestions_open', 'naturalquestions_closed', 'openbookqa', 'mmlu', 'math', 'gsm8k', 'legalbench', 'medqa', 'wmt_2014']",holistic | |
| 6,claude_3_opus_20240229,0.762,helm_lite_mwr,helm_lite_240610,"['narrativeqa', 'naturalquestions_open', 'naturalquestions_closed', 'openbookqa', 'mmlu', 'math', 'gsm8k', 'legalbench', 'medqa', 'wmt_2014']",holistic | |
| 7,qwen1.5_72b,0.757,helm_lite_mwr,helm_lite_240610,"['narrativeqa', 'naturalquestions_open', 'naturalquestions_closed', 'openbookqa', 'mmlu', 'math', 'gsm8k', 'legalbench', 'medqa', 'wmt_2014']",holistic | |
| 8,palmyra_x_v2_33b,0.736,helm_lite_mwr,helm_lite_240610,"['narrativeqa', 'naturalquestions_open', 'naturalquestions_closed', 'openbookqa', 'mmlu', 'math', 'gsm8k', 'legalbench', 'medqa', 'wmt_2014']",holistic | |
| 9,yi_34b,0.723,helm_lite_mwr,helm_lite_240610,"['narrativeqa', 'naturalquestions_open', 'naturalquestions_closed', 'openbookqa', 'mmlu', 'math', 'gsm8k', 'legalbench', 'medqa', 'wmt_2014']",holistic | |
| 10,qwen1.5_32b,0.689,helm_lite_mwr,helm_lite_240610,"['narrativeqa', 'naturalquestions_open', 'naturalquestions_closed', 'openbookqa', 'mmlu', 'math', 'gsm8k', 'legalbench', 'medqa', 'wmt_2014']",holistic | |
| 11,claude_v1.3,0.689,helm_lite_mwr,helm_lite_240610,"['narrativeqa', 'naturalquestions_open', 'naturalquestions_closed', 'openbookqa', 'mmlu', 'math', 'gsm8k', 'legalbench', 'medqa', 'wmt_2014']",holistic | |
| 12,mixtral_8x7b_32k_seqlen,0.679,helm_lite_mwr,helm_lite_240610,"['narrativeqa', 'naturalquestions_open', 'naturalquestions_closed', 'openbookqa', 'mmlu', 'math', 'gsm8k', 'legalbench', 'medqa', 'wmt_2014']",holistic | |
| 13,palm_2_bison,0.655,helm_lite_mwr,helm_lite_240610,"['narrativeqa', 'naturalquestions_open', 'naturalquestions_closed', 'openbookqa', 'mmlu', 'math', 'gsm8k', 'legalbench', 'medqa', 'wmt_2014']",holistic | |
| 14,claude_2.0,0.651,helm_lite_mwr,helm_lite_240610,"['narrativeqa', 'naturalquestions_open', 'naturalquestions_closed', 'openbookqa', 'mmlu', 'math', 'gsm8k', 'legalbench', 'medqa', 'wmt_2014']",holistic | |
| 15,deepseek_llm_chat_67b,0.645,helm_lite_mwr,helm_lite_240610,"['narrativeqa', 'naturalquestions_open', 'naturalquestions_closed', 'openbookqa', 'mmlu', 'math', 'gsm8k', 'legalbench', 'medqa', 'wmt_2014']",holistic | |
| 16,llama_2_70b,0.609,helm_lite_mwr,helm_lite_240610,"['narrativeqa', 'naturalquestions_open', 'naturalquestions_closed', 'openbookqa', 'mmlu', 'math', 'gsm8k', 'legalbench', 'medqa', 'wmt_2014']",holistic | |
| 17,claude_2.1,0.594,helm_lite_mwr,helm_lite_240610,"['narrativeqa', 'naturalquestions_open', 'naturalquestions_closed', 'openbookqa', 'mmlu', 'math', 'gsm8k', 'legalbench', 'medqa', 'wmt_2014']",holistic | |
| 18,gpt_3.5_text_davinci_003,0.577,helm_lite_mwr,helm_lite_240610,"['narrativeqa', 'naturalquestions_open', 'naturalquestions_closed', 'openbookqa', 'mmlu', 'math', 'gsm8k', 'legalbench', 'medqa', 'wmt_2014']",holistic | |
| 19,qwen1.5_14b,0.574,helm_lite_mwr,helm_lite_240610,"['narrativeqa', 'naturalquestions_open', 'naturalquestions_closed', 'openbookqa', 'mmlu', 'math', 'gsm8k', 'legalbench', 'medqa', 'wmt_2014']",holistic | |
| 20,claude_instant_1.2,0.551,helm_lite_mwr,helm_lite_240610,"['narrativeqa', 'naturalquestions_open', 'naturalquestions_closed', 'openbookqa', 'mmlu', 'math', 'gsm8k', 'legalbench', 'medqa', 'wmt_2014']",holistic | |
| 21,llama_3_8b,0.519,helm_lite_mwr,helm_lite_240610,"['narrativeqa', 'naturalquestions_open', 'naturalquestions_closed', 'openbookqa', 'mmlu', 'math', 'gsm8k', 'legalbench', 'medqa', 'wmt_2014']",holistic | |
| 22,gpt_3.5_turbo_0613,0.502,helm_lite_mwr,helm_lite_240610,"['narrativeqa', 'naturalquestions_open', 'naturalquestions_closed', 'openbookqa', 'mmlu', 'math', 'gsm8k', 'legalbench', 'medqa', 'wmt_2014']",holistic | |
| 23,gemma_7b,0.47,helm_lite_mwr,helm_lite_240610,"['narrativeqa', 'naturalquestions_open', 'naturalquestions_closed', 'openbookqa', 'mmlu', 'math', 'gsm8k', 'legalbench', 'medqa', 'wmt_2014']",holistic | |
| 24,claude_3_sonnet_20240229,0.468,helm_lite_mwr,helm_lite_240610,"['narrativeqa', 'naturalquestions_open', 'naturalquestions_closed', 'openbookqa', 'mmlu', 'math', 'gsm8k', 'legalbench', 'medqa', 'wmt_2014']",holistic | |
| 25,gpt_3.5_text_davinci_002,0.468,helm_lite_mwr,helm_lite_240610,"['narrativeqa', 'naturalquestions_open', 'naturalquestions_closed', 'openbookqa', 'mmlu', 'math', 'gsm8k', 'legalbench', 'medqa', 'wmt_2014']",holistic | |
| 26,llama_65b,0.466,helm_lite_mwr,helm_lite_240610,"['narrativeqa', 'naturalquestions_open', 'naturalquestions_closed', 'openbookqa', 'mmlu', 'math', 'gsm8k', 'legalbench', 'medqa', 'wmt_2014']",holistic | |
| 27,mistral_large_2402,0.46,helm_lite_mwr,helm_lite_240610,"['narrativeqa', 'naturalquestions_open', 'naturalquestions_closed', 'openbookqa', 'mmlu', 'math', 'gsm8k', 'legalbench', 'medqa', 'wmt_2014']",holistic | |
| 28,cohere_command,0.421,helm_lite_mwr,helm_lite_240610,"['narrativeqa', 'naturalquestions_open', 'naturalquestions_closed', 'openbookqa', 'mmlu', 'math', 'gsm8k', 'legalbench', 'medqa', 'wmt_2014']",holistic | |
| 29,dbrx_instructruct,0.419,helm_lite_mwr,helm_lite_240610,"['narrativeqa', 'naturalquestions_open', 'naturalquestions_closed', 'openbookqa', 'mmlu', 'math', 'gsm8k', 'legalbench', 'medqa', 'wmt_2014']",holistic | |
| 30,mistral_v0.1_7b,0.415,helm_lite_mwr,helm_lite_240610,"['narrativeqa', 'naturalquestions_open', 'naturalquestions_closed', 'openbookqa', 'mmlu', 'math', 'gsm8k', 'legalbench', 'medqa', 'wmt_2014']",holistic | |
| 31,mistral_small_2402,0.415,helm_lite_mwr,helm_lite_240610,"['narrativeqa', 'naturalquestions_open', 'naturalquestions_closed', 'openbookqa', 'mmlu', 'math', 'gsm8k', 'legalbench', 'medqa', 'wmt_2014']",holistic | |
| 32,mistral_medium_2312,0.383,helm_lite_mwr,helm_lite_240610,"['narrativeqa', 'naturalquestions_open', 'naturalquestions_closed', 'openbookqa', 'mmlu', 'math', 'gsm8k', 'legalbench', 'medqa', 'wmt_2014']",holistic | |
| 33,qwen1.5_7b,0.377,helm_lite_mwr,helm_lite_240610,"['narrativeqa', 'naturalquestions_open', 'naturalquestions_closed', 'openbookqa', 'mmlu', 'math', 'gsm8k', 'legalbench', 'medqa', 'wmt_2014']",holistic | |
| 34,claude_3_haiku_20240307,0.377,helm_lite_mwr,helm_lite_240610,"['narrativeqa', 'naturalquestions_open', 'naturalquestions_closed', 'openbookqa', 'mmlu', 'math', 'gsm8k', 'legalbench', 'medqa', 'wmt_2014']",holistic | |
| 35,yi_6b,0.351,helm_lite_mwr,helm_lite_240610,"['narrativeqa', 'naturalquestions_open', 'naturalquestions_closed', 'openbookqa', 'mmlu', 'math', 'gsm8k', 'legalbench', 'medqa', 'wmt_2014']",holistic | |
| 36,llama_2_13b,0.332,helm_lite_mwr,helm_lite_240610,"['narrativeqa', 'naturalquestions_open', 'naturalquestions_closed', 'openbookqa', 'mmlu', 'math', 'gsm8k', 'legalbench', 'medqa', 'wmt_2014']",holistic | |
| 37,jurassic_2_jumbo_178b,0.317,helm_lite_mwr,helm_lite_240610,"['narrativeqa', 'naturalquestions_open', 'naturalquestions_closed', 'openbookqa', 'mmlu', 'math', 'gsm8k', 'legalbench', 'medqa', 'wmt_2014']",holistic | |
| 38,falcon_40b,0.306,helm_lite_mwr,helm_lite_240610,"['narrativeqa', 'naturalquestions_open', 'naturalquestions_closed', 'openbookqa', 'mmlu', 'math', 'gsm8k', 'legalbench', 'medqa', 'wmt_2014']",holistic | |
| 39,phi_2,0.26,helm_lite_mwr,helm_lite_240610,"['narrativeqa', 'naturalquestions_open', 'naturalquestions_closed', 'openbookqa', 'mmlu', 'math', 'gsm8k', 'legalbench', 'medqa', 'wmt_2014']",holistic | |
| 40,jurassic_2_grande_17b,0.253,helm_lite_mwr,helm_lite_240610,"['narrativeqa', 'naturalquestions_open', 'naturalquestions_closed', 'openbookqa', 'mmlu', 'math', 'gsm8k', 'legalbench', 'medqa', 'wmt_2014']",holistic | |
| 41,llama_2_7b,0.234,helm_lite_mwr,helm_lite_240610,"['narrativeqa', 'naturalquestions_open', 'naturalquestions_closed', 'openbookqa', 'mmlu', 'math', 'gsm8k', 'legalbench', 'medqa', 'wmt_2014']",holistic | |
| 42,luminous_supreme_70b,0.213,helm_lite_mwr,helm_lite_240610,"['narrativeqa', 'naturalquestions_open', 'naturalquestions_closed', 'openbookqa', 'mmlu', 'math', 'gsm8k', 'legalbench', 'medqa', 'wmt_2014']",holistic | |
| 43,cohere_command_light,0.166,helm_lite_mwr,helm_lite_240610,"['narrativeqa', 'naturalquestions_open', 'naturalquestions_closed', 'openbookqa', 'mmlu', 'math', 'gsm8k', 'legalbench', 'medqa', 'wmt_2014']",holistic | |
| 44,luminous_extended_30b,0.119,helm_lite_mwr,helm_lite_240610,"['narrativeqa', 'naturalquestions_open', 'naturalquestions_closed', 'openbookqa', 'mmlu', 'math', 'gsm8k', 'legalbench', 'medqa', 'wmt_2014']",holistic | |
| 45,falcon_7b,0.1,helm_lite_mwr,helm_lite_240610,"['narrativeqa', 'naturalquestions_open', 'naturalquestions_closed', 'openbookqa', 'mmlu', 'math', 'gsm8k', 'legalbench', 'medqa', 'wmt_2014']",holistic | |
| 46,olmo_7b,0.083,helm_lite_mwr,helm_lite_240610,"['narrativeqa', 'naturalquestions_open', 'naturalquestions_closed', 'openbookqa', 'mmlu', 'math', 'gsm8k', 'legalbench', 'medqa', 'wmt_2014']",holistic | |
| 47,luminous_base_13b,0.072,helm_lite_mwr,helm_lite_240610,"['narrativeqa', 'naturalquestions_open', 'naturalquestions_closed', 'openbookqa', 'mmlu', 'math', 'gsm8k', 'legalbench', 'medqa', 'wmt_2014']",holistic | |
| 48,gpt_4_0613,0.768,narrativeqa,helm_lite_240610,[],knowledge | |
| 49,llama_3_70b,0.798,narrativeqa,helm_lite_240610,[],knowledge | |
| 50,mixtral_8x22b,0.779,narrativeqa,helm_lite_240610,[],knowledge | |
| 51,palmyra_x_v3_72b,0.706,narrativeqa,helm_lite_240610,[],knowledge | |
| 52,gpt_4_turbo_1106_preview,0.727,narrativeqa,helm_lite_240610,[],knowledge | |
| 53,palm_2_unicorn,0.583,narrativeqa,helm_lite_240610,[],knowledge | |
| 54,claude_3_opus_20240229,0.351,narrativeqa,helm_lite_240610,[],knowledge | |
| 55,qwen1.5_72b,0.601,narrativeqa,helm_lite_240610,[],knowledge | |
| 56,palmyra_x_v2_33b,0.752,narrativeqa,helm_lite_240610,[],knowledge | |
| 57,yi_34b,0.782,narrativeqa,helm_lite_240610,[],knowledge | |
| 58,qwen1.5_32b,0.589,narrativeqa,helm_lite_240610,[],knowledge | |
| 59,claude_v1.3,0.723,narrativeqa,helm_lite_240610,[],knowledge | |
| 60,mixtral_8x7b_32k_seqlen,0.767,narrativeqa,helm_lite_240610,[],knowledge | |
| 61,palm_2_bison,0.718,narrativeqa,helm_lite_240610,[],knowledge | |
| 62,claude_2.0,0.718,narrativeqa,helm_lite_240610,[],knowledge | |
| 63,deepseek_llm_chat_67b,0.581,narrativeqa,helm_lite_240610,[],knowledge | |
| 64,llama_2_70b,0.763,narrativeqa,helm_lite_240610,[],knowledge | |
| 65,claude_2.1,0.677,narrativeqa,helm_lite_240610,[],knowledge | |
| 66,gpt_3.5_text_davinci_003,0.731,narrativeqa,helm_lite_240610,[],knowledge | |
| 67,qwen1.5_14b,0.711,narrativeqa,helm_lite_240610,[],knowledge | |
| 68,claude_instant_1.2,0.616,narrativeqa,helm_lite_240610,[],knowledge | |
| 69,llama_3_8b,0.754,narrativeqa,helm_lite_240610,[],knowledge | |
| 70,gpt_3.5_turbo_0613,0.655,narrativeqa,helm_lite_240610,[],knowledge | |
| 71,gemma_7b,0.752,narrativeqa,helm_lite_240610,[],knowledge | |
| 72,claude_3_sonnet_20240229,0.111,narrativeqa,helm_lite_240610,[],knowledge | |
| 73,gpt_3.5_text_davinci_002,0.719,narrativeqa,helm_lite_240610,[],knowledge | |
| 74,llama_65b,0.755,narrativeqa,helm_lite_240610,[],knowledge | |
| 75,mistral_large_2402,0.454,narrativeqa,helm_lite_240610,[],knowledge | |
| 76,cohere_command,0.749,narrativeqa,helm_lite_240610,[],knowledge | |
| 77,dbrx_instructruct,0.488,narrativeqa,helm_lite_240610,[],knowledge | |
| 78,mistral_v0.1_7b,0.716,narrativeqa,helm_lite_240610,[],knowledge | |
| 79,mistral_small_2402,0.519,narrativeqa,helm_lite_240610,[],knowledge | |
| 80,mistral_medium_2312,0.449,narrativeqa,helm_lite_240610,[],knowledge | |
| 81,qwen1.5_7b,0.448,narrativeqa,helm_lite_240610,[],knowledge | |
| 82,claude_3_haiku_20240307,0.244,narrativeqa,helm_lite_240610,[],knowledge | |
| 83,yi_6b,0.702,narrativeqa,helm_lite_240610,[],knowledge | |
| 84,llama_2_13b,0.741,narrativeqa,helm_lite_240610,[],knowledge | |
| 85,jurassic_2_jumbo_178b,0.728,narrativeqa,helm_lite_240610,[],knowledge | |
| 86,falcon_40b,0.671,narrativeqa,helm_lite_240610,[],knowledge | |
| 87,phi_2,0.703,narrativeqa,helm_lite_240610,[],knowledge | |
| 88,jurassic_2_grande_17b,0.744,narrativeqa,helm_lite_240610,[],knowledge | |
| 89,llama_2_7b,0.686,narrativeqa,helm_lite_240610,[],knowledge | |
| 90,luminous_supreme_70b,0.743,narrativeqa,helm_lite_240610,[],knowledge | |
| 91,cohere_command_light,0.629,narrativeqa,helm_lite_240610,[],knowledge | |
| 92,luminous_extended_30b,0.684,narrativeqa,helm_lite_240610,[],knowledge | |
| 93,falcon_7b,0.621,narrativeqa,helm_lite_240610,[],knowledge | |
| 94,olmo_7b,0.597,narrativeqa,helm_lite_240610,[],knowledge | |
| 95,luminous_base_13b,0.633,narrativeqa,helm_lite_240610,[],knowledge | |
| 96,gpt_4_0613,0.79,naturalquestions_open,helm_lite_240610,[],knowledge | |
| 97,llama_3_70b,0.743,naturalquestions_open,helm_lite_240610,[],knowledge | |
| 98,mixtral_8x22b,0.726,naturalquestions_open,helm_lite_240610,[],knowledge | |
| 99,palmyra_x_v3_72b,0.685,naturalquestions_open,helm_lite_240610,[],knowledge | |
| 100,gpt_4_turbo_1106_preview,0.763,naturalquestions_open,helm_lite_240610,[],knowledge | |
| 101,palm_2_unicorn,0.674,naturalquestions_open,helm_lite_240610,[],knowledge | |
| 102,claude_3_opus_20240229,0.264,naturalquestions_open,helm_lite_240610,[],knowledge | |
| 103,qwen1.5_72b,0.758,naturalquestions_open,helm_lite_240610,[],knowledge | |
| 104,palmyra_x_v2_33b,0.752,naturalquestions_open,helm_lite_240610,[],knowledge | |
| 105,yi_34b,0.775,naturalquestions_open,helm_lite_240610,[],knowledge | |
| 106,qwen1.5_32b,0.777,naturalquestions_open,helm_lite_240610,[],knowledge | |
| 107,claude_v1.3,0.699,naturalquestions_open,helm_lite_240610,[],knowledge | |
| 108,mixtral_8x7b_32k_seqlen,0.699,naturalquestions_open,helm_lite_240610,[],knowledge | |
| 109,palm_2_bison,0.813,naturalquestions_open,helm_lite_240610,[],knowledge | |
| 110,claude_2.0,0.67,naturalquestions_open,helm_lite_240610,[],knowledge | |
| 111,deepseek_llm_chat_67b,0.733,naturalquestions_open,helm_lite_240610,[],knowledge | |
| 112,llama_2_70b,0.674,naturalquestions_open,helm_lite_240610,[],knowledge | |
| 113,claude_2.1,0.611,naturalquestions_open,helm_lite_240610,[],knowledge | |
| 114,gpt_3.5_text_davinci_003,0.77,naturalquestions_open,helm_lite_240610,[],knowledge | |
| 115,qwen1.5_14b,0.772,naturalquestions_open,helm_lite_240610,[],knowledge | |
| 116,claude_instant_1.2,0.731,naturalquestions_open,helm_lite_240610,[],knowledge | |
| 117,llama_3_8b,0.681,naturalquestions_open,helm_lite_240610,[],knowledge | |
| 118,gpt_3.5_turbo_0613,0.678,naturalquestions_open,helm_lite_240610,[],knowledge | |
| 119,gemma_7b,0.665,naturalquestions_open,helm_lite_240610,[],knowledge | |
| 120,claude_3_sonnet_20240229,0.072,naturalquestions_open,helm_lite_240610,[],knowledge | |
| 121,gpt_3.5_text_davinci_002,0.71,naturalquestions_open,helm_lite_240610,[],knowledge | |
| 122,llama_65b,0.672,naturalquestions_open,helm_lite_240610,[],knowledge | |
| 123,mistral_large_2402,0.485,naturalquestions_open,helm_lite_240610,[],knowledge | |
| 124,cohere_command,0.777,naturalquestions_open,helm_lite_240610,[],knowledge | |
| 125,dbrx_instructruct,0.55,naturalquestions_open,helm_lite_240610,[],knowledge | |
| 126,mistral_v0.1_7b,0.687,naturalquestions_open,helm_lite_240610,[],knowledge | |
| 127,mistral_small_2402,0.587,naturalquestions_open,helm_lite_240610,[],knowledge | |
| 128,mistral_medium_2312,0.468,naturalquestions_open,helm_lite_240610,[],knowledge | |
| 129,qwen1.5_7b,0.749,naturalquestions_open,helm_lite_240610,[],knowledge | |
| 130,claude_3_haiku_20240307,0.252,naturalquestions_open,helm_lite_240610,[],knowledge | |
| 131,yi_6b,0.748,naturalquestions_open,helm_lite_240610,[],knowledge | |
| 132,llama_2_13b,0.64,naturalquestions_open,helm_lite_240610,[],knowledge | |
| 133,jurassic_2_jumbo_178b,0.65,naturalquestions_open,helm_lite_240610,[],knowledge | |
| 134,falcon_40b,0.676,naturalquestions_open,helm_lite_240610,[],knowledge | |
| 135,phi_2,0.68,naturalquestions_open,helm_lite_240610,[],knowledge | |
| 136,jurassic_2_grande_17b,0.627,naturalquestions_open,helm_lite_240610,[],knowledge | |
| 137,llama_2_7b,0.612,naturalquestions_open,helm_lite_240610,[],knowledge | |
| 138,luminous_supreme_70b,0.656,naturalquestions_open,helm_lite_240610,[],knowledge | |
| 139,cohere_command_light,0.686,naturalquestions_open,helm_lite_240610,[],knowledge | |
| 140,luminous_extended_30b,0.611,naturalquestions_open,helm_lite_240610,[],knowledge | |
| 141,falcon_7b,0.58,naturalquestions_open,helm_lite_240610,[],knowledge | |
| 142,olmo_7b,0.603,naturalquestions_open,helm_lite_240610,[],knowledge | |
| 143,luminous_base_13b,0.577,naturalquestions_open,helm_lite_240610,[],knowledge | |
| 144,gpt_4_0613,0.457,naturalquestions_closed,helm_lite_240610,[],knowledge | |
| 145,llama_3_70b,0.475,naturalquestions_closed,helm_lite_240610,[],knowledge | |
| 146,mixtral_8x22b,0.478,naturalquestions_closed,helm_lite_240610,[],knowledge | |
| 147,palmyra_x_v3_72b,0.407,naturalquestions_closed,helm_lite_240610,[],knowledge | |
| 148,gpt_4_turbo_1106_preview,0.435,naturalquestions_closed,helm_lite_240610,[],knowledge | |
| 149,palm_2_unicorn,0.435,naturalquestions_closed,helm_lite_240610,[],knowledge | |
| 150,claude_3_opus_20240229,0.441,naturalquestions_closed,helm_lite_240610,[],knowledge | |
| 151,qwen1.5_72b,0.417,naturalquestions_closed,helm_lite_240610,[],knowledge | |
| 152,palmyra_x_v2_33b,0.428,naturalquestions_closed,helm_lite_240610,[],knowledge | |
| 153,yi_34b,0.443,naturalquestions_closed,helm_lite_240610,[],knowledge | |
| 154,qwen1.5_32b,0.353,naturalquestions_closed,helm_lite_240610,[],knowledge | |
| 155,claude_v1.3,0.409,naturalquestions_closed,helm_lite_240610,[],knowledge | |
| 156,mixtral_8x7b_32k_seqlen,0.427,naturalquestions_closed,helm_lite_240610,[],knowledge | |
| 157,palm_2_bison,0.39,naturalquestions_closed,helm_lite_240610,[],knowledge | |
| 158,claude_2.0,0.428,naturalquestions_closed,helm_lite_240610,[],knowledge | |
| 159,deepseek_llm_chat_67b,0.412,naturalquestions_closed,helm_lite_240610,[],knowledge | |
| 160,llama_2_70b,0.46,naturalquestions_closed,helm_lite_240610,[],knowledge | |
| 161,claude_2.1,0.375,naturalquestions_closed,helm_lite_240610,[],knowledge | |
| 162,gpt_3.5_text_davinci_003,0.413,naturalquestions_closed,helm_lite_240610,[],knowledge | |
| 163,qwen1.5_14b,0.3,naturalquestions_closed,helm_lite_240610,[],knowledge | |
| 164,claude_instant_1.2,0.343,naturalquestions_closed,helm_lite_240610,[],knowledge | |
| 165,llama_3_8b,0.378,naturalquestions_closed,helm_lite_240610,[],knowledge | |
| 166,gpt_3.5_turbo_0613,0.335,naturalquestions_closed,helm_lite_240610,[],knowledge | |
| 167,gemma_7b,0.336,naturalquestions_closed,helm_lite_240610,[],knowledge | |
| 168,claude_3_sonnet_20240229,0.028,naturalquestions_closed,helm_lite_240610,[],knowledge | |
| 169,gpt_3.5_text_davinci_002,0.394,naturalquestions_closed,helm_lite_240610,[],knowledge | |
| 170,llama_65b,0.433,naturalquestions_closed,helm_lite_240610,[],knowledge | |
| 171,mistral_large_2402,0.311,naturalquestions_closed,helm_lite_240610,[],knowledge | |
| 172,cohere_command,0.391,naturalquestions_closed,helm_lite_240610,[],knowledge | |
| 173,dbrx_instructruct,0.284,naturalquestions_closed,helm_lite_240610,[],knowledge | |
| 174,mistral_v0.1_7b,0.367,naturalquestions_closed,helm_lite_240610,[],knowledge | |
| 175,mistral_small_2402,0.304,naturalquestions_closed,helm_lite_240610,[],knowledge | |
| 176,mistral_medium_2312,0.29,naturalquestions_closed,helm_lite_240610,[],knowledge | |
| 177,qwen1.5_7b,0.27,naturalquestions_closed,helm_lite_240610,[],knowledge | |
| 178,claude_3_haiku_20240307,0.144,naturalquestions_closed,helm_lite_240610,[],knowledge | |
| 179,yi_6b,0.31,naturalquestions_closed,helm_lite_240610,[],knowledge | |
| 180,llama_2_13b,0.371,naturalquestions_closed,helm_lite_240610,[],knowledge | |
| 181,jurassic_2_jumbo_178b,0.385,naturalquestions_closed,helm_lite_240610,[],knowledge | |
| 182,falcon_40b,0.392,naturalquestions_closed,helm_lite_240610,[],knowledge | |
| 183,phi_2,0.155,naturalquestions_closed,helm_lite_240610,[],knowledge | |
| 184,jurassic_2_grande_17b,0.35,naturalquestions_closed,helm_lite_240610,[],knowledge | |
| 185,llama_2_7b,0.333,naturalquestions_closed,helm_lite_240610,[],knowledge | |
| 186,luminous_supreme_70b,0.299,naturalquestions_closed,helm_lite_240610,[],knowledge | |
| 187,cohere_command_light,0.195,naturalquestions_closed,helm_lite_240610,[],knowledge | |
| 188,luminous_extended_30b,0.253,naturalquestions_closed,helm_lite_240610,[],knowledge | |
| 189,falcon_7b,0.285,naturalquestions_closed,helm_lite_240610,[],knowledge | |
| 190,olmo_7b,0.259,naturalquestions_closed,helm_lite_240610,[],knowledge | |
| 191,luminous_base_13b,0.197,naturalquestions_closed,helm_lite_240610,[],knowledge | |
| 192,gpt_4_0613,0.96,openbookqa,helm_lite_240610,[],knowledge | |
| 193,llama_3_70b,0.934,openbookqa,helm_lite_240610,[],knowledge | |
| 194,mixtral_8x22b,0.882,openbookqa,helm_lite_240610,[],knowledge | |
| 195,palmyra_x_v3_72b,0.938,openbookqa,helm_lite_240610,[],knowledge | |
| 196,gpt_4_turbo_1106_preview,0.95,openbookqa,helm_lite_240610,[],knowledge | |
| 197,palm_2_unicorn,0.938,openbookqa,helm_lite_240610,[],knowledge | |
| 198,claude_3_opus_20240229,0.956,openbookqa,helm_lite_240610,[],knowledge | |
| 199,qwen1.5_72b,0.93,openbookqa,helm_lite_240610,[],knowledge | |
| 200,palmyra_x_v2_33b,0.878,openbookqa,helm_lite_240610,[],knowledge | |
| 201,yi_34b,0.92,openbookqa,helm_lite_240610,[],knowledge | |
| 202,qwen1.5_32b,0.932,openbookqa,helm_lite_240610,[],knowledge | |
| 203,claude_v1.3,0.908,openbookqa,helm_lite_240610,[],knowledge | |
| 204,mixtral_8x7b_32k_seqlen,0.868,openbookqa,helm_lite_240610,[],knowledge | |
| 205,palm_2_bison,0.878,openbookqa,helm_lite_240610,[],knowledge | |
| 206,claude_2.0,0.862,openbookqa,helm_lite_240610,[],knowledge | |
| 207,deepseek_llm_chat_67b,0.88,openbookqa,helm_lite_240610,[],knowledge | |
| 208,llama_2_70b,0.838,openbookqa,helm_lite_240610,[],knowledge | |
| 209,claude_2.1,0.872,openbookqa,helm_lite_240610,[],knowledge | |
| 210,gpt_3.5_text_davinci_003,0.828,openbookqa,helm_lite_240610,[],knowledge | |
| 211,qwen1.5_14b,0.862,openbookqa,helm_lite_240610,[],knowledge | |
| 212,claude_instant_1.2,0.844,openbookqa,helm_lite_240610,[],knowledge | |
| 213,llama_3_8b,0.766,openbookqa,helm_lite_240610,[],knowledge | |
| 214,gpt_3.5_turbo_0613,0.838,openbookqa,helm_lite_240610,[],knowledge | |
| 215,gemma_7b,0.808,openbookqa,helm_lite_240610,[],knowledge | |
| 216,claude_3_sonnet_20240229,0.918,openbookqa,helm_lite_240610,[],knowledge | |
| 217,gpt_3.5_text_davinci_002,0.796,openbookqa,helm_lite_240610,[],knowledge | |
| 218,llama_65b,0.754,openbookqa,helm_lite_240610,[],knowledge | |
| 219,mistral_large_2402,0.894,openbookqa,helm_lite_240610,[],knowledge | |
| 220,cohere_command,0.774,openbookqa,helm_lite_240610,[],knowledge | |
| 221,dbrx_instructruct,0.91,openbookqa,helm_lite_240610,[],knowledge | |
| 222,mistral_v0.1_7b,0.776,openbookqa,helm_lite_240610,[],knowledge | |
| 223,mistral_small_2402,0.862,openbookqa,helm_lite_240610,[],knowledge | |
| 224,mistral_medium_2312,0.83,openbookqa,helm_lite_240610,[],knowledge | |
| 225,qwen1.5_7b,0.806,openbookqa,helm_lite_240610,[],knowledge | |
| 226,claude_3_haiku_20240307,0.838,openbookqa,helm_lite_240610,[],knowledge | |
| 227,yi_6b,0.8,openbookqa,helm_lite_240610,[],knowledge | |
| 228,llama_2_13b,0.634,openbookqa,helm_lite_240610,[],knowledge | |
| 229,jurassic_2_jumbo_178b,0.688,openbookqa,helm_lite_240610,[],knowledge | |
| 230,falcon_40b,0.662,openbookqa,helm_lite_240610,[],knowledge | |
| 231,phi_2,0.798,openbookqa,helm_lite_240610,[],knowledge | |
| 232,jurassic_2_grande_17b,0.614,openbookqa,helm_lite_240610,[],knowledge | |
| 233,llama_2_7b,0.544,openbookqa,helm_lite_240610,[],knowledge | |
| 234,luminous_supreme_70b,0.284,openbookqa,helm_lite_240610,[],knowledge | |
| 235,cohere_command_light,0.398,openbookqa,helm_lite_240610,[],knowledge | |
| 236,luminous_extended_30b,0.272,openbookqa,helm_lite_240610,[],knowledge | |
| 237,falcon_7b,0.26,openbookqa,helm_lite_240610,[],knowledge | |
| 238,olmo_7b,0.222,openbookqa,helm_lite_240610,[],knowledge | |
| 239,luminous_base_13b,0.286,openbookqa,helm_lite_240610,[],knowledge | |
| 240,gpt_4_0613,0.735,mmlu,helm_lite_240610,[],knowledge | |
| 241,llama_3_70b,0.695,mmlu,helm_lite_240610,[],knowledge | |
| 242,mixtral_8x22b,0.701,mmlu,helm_lite_240610,[],knowledge | |
| 243,palmyra_x_v3_72b,0.702,mmlu,helm_lite_240610,[],knowledge | |
| 244,gpt_4_turbo_1106_preview,0.699,mmlu,helm_lite_240610,[],knowledge | |
| 245,palm_2_unicorn,0.702,mmlu,helm_lite_240610,[],knowledge | |
| 246,claude_3_opus_20240229,0.768,mmlu,helm_lite_240610,[],knowledge | |
| 247,qwen1.5_72b,0.647,mmlu,helm_lite_240610,[],knowledge | |
| 248,palmyra_x_v2_33b,0.621,mmlu,helm_lite_240610,[],knowledge | |
| 249,yi_34b,0.65,mmlu,helm_lite_240610,[],knowledge | |
| 250,qwen1.5_32b,0.628,mmlu,helm_lite_240610,[],knowledge | |
| 251,claude_v1.3,0.631,mmlu,helm_lite_240610,[],knowledge | |
| 252,mixtral_8x7b_32k_seqlen,0.649,mmlu,helm_lite_240610,[],knowledge | |
| 253,palm_2_bison,0.608,mmlu,helm_lite_240610,[],knowledge | |
| 254,claude_2.0,0.639,mmlu,helm_lite_240610,[],knowledge | |
| 255,deepseek_llm_chat_67b,0.641,mmlu,helm_lite_240610,[],knowledge | |
| 256,llama_2_70b,0.58,mmlu,helm_lite_240610,[],knowledge | |
| 257,claude_2.1,0.643,mmlu,helm_lite_240610,[],knowledge | |
| 258,gpt_3.5_text_davinci_003,0.555,mmlu,helm_lite_240610,[],knowledge | |
| 259,qwen1.5_14b,0.626,mmlu,helm_lite_240610,[],knowledge | |
| 260,claude_instant_1.2,0.631,mmlu,helm_lite_240610,[],knowledge | |
| 261,llama_3_8b,0.602,mmlu,helm_lite_240610,[],knowledge | |
| 262,gpt_3.5_turbo_0613,0.614,mmlu,helm_lite_240610,[],knowledge | |
| 263,gemma_7b,0.571,mmlu,helm_lite_240610,[],knowledge | |
| 264,claude_3_sonnet_20240229,0.652,mmlu,helm_lite_240610,[],knowledge | |
| 265,gpt_3.5_text_davinci_002,0.568,mmlu,helm_lite_240610,[],knowledge | |
| 266,llama_65b,0.584,mmlu,helm_lite_240610,[],knowledge | |
| 267,mistral_large_2402,0.638,mmlu,helm_lite_240610,[],knowledge | |
| 268,cohere_command,0.525,mmlu,helm_lite_240610,[],knowledge | |
| 269,dbrx_instructruct,0.643,mmlu,helm_lite_240610,[],knowledge | |
| 270,mistral_v0.1_7b,0.584,mmlu,helm_lite_240610,[],knowledge | |
| 271,mistral_small_2402,0.593,mmlu,helm_lite_240610,[],knowledge | |
| 272,mistral_medium_2312,0.618,mmlu,helm_lite_240610,[],knowledge | |
| 273,qwen1.5_7b,0.569,mmlu,helm_lite_240610,[],knowledge | |
| 274,claude_3_haiku_20240307,0.662,mmlu,helm_lite_240610,[],knowledge | |
| 275,yi_6b,0.53,mmlu,helm_lite_240610,[],knowledge | |
| 276,llama_2_13b,0.505,mmlu,helm_lite_240610,[],knowledge | |
| 277,jurassic_2_jumbo_178b,0.483,mmlu,helm_lite_240610,[],knowledge | |
| 278,falcon_40b,0.507,mmlu,helm_lite_240610,[],knowledge | |
| 279,phi_2,0.518,mmlu,helm_lite_240610,[],knowledge | |
| 280,jurassic_2_grande_17b,0.471,mmlu,helm_lite_240610,[],knowledge | |
| 281,llama_2_7b,0.425,mmlu,helm_lite_240610,[],knowledge | |
| 282,luminous_supreme_70b,0.316,mmlu,helm_lite_240610,[],knowledge | |
| 283,cohere_command_light,0.386,mmlu,helm_lite_240610,[],knowledge | |
| 284,luminous_extended_30b,0.248,mmlu,helm_lite_240610,[],knowledge | |
| 285,falcon_7b,0.288,mmlu,helm_lite_240610,[],knowledge | |
| 286,olmo_7b,0.305,mmlu,helm_lite_240610,[],knowledge | |
| 287,luminous_base_13b,0.243,mmlu,helm_lite_240610,[],knowledge | |
| 288,gpt_4_0613,0.802,math,helm_lite_240610,[],math | |
| 289,llama_3_70b,0.663,math,helm_lite_240610,[],math | |
| 290,mixtral_8x22b,0.656,math,helm_lite_240610,[],math | |
| 291,palmyra_x_v3_72b,0.723,math,helm_lite_240610,[],math | |
| 292,gpt_4_turbo_1106_preview,0.857,math,helm_lite_240610,[],math | |
| 293,palm_2_unicorn,0.674,math,helm_lite_240610,[],math | |
| 294,claude_3_opus_20240229,0.76,math,helm_lite_240610,[],math | |
| 295,qwen1.5_72b,0.683,math,helm_lite_240610,[],math | |
| 296,palmyra_x_v2_33b,0.58,math,helm_lite_240610,[],math | |
| 297,yi_34b,0.375,math,helm_lite_240610,[],math | |
| 298,qwen1.5_32b,0.733,math,helm_lite_240610,[],math | |
| 299,claude_v1.3,0.54,math,helm_lite_240610,[],math | |
| 300,mixtral_8x7b_32k_seqlen,0.494,math,helm_lite_240610,[],math | |
| 301,palm_2_bison,0.421,math,helm_lite_240610,[],math | |
| 302,claude_2.0,0.603,math,helm_lite_240610,[],math | |
| 303,deepseek_llm_chat_67b,0.615,math,helm_lite_240610,[],math | |
| 304,llama_2_70b,0.323,math,helm_lite_240610,[],math | |
| 305,claude_2.1,0.632,math,helm_lite_240610,[],math | |
| 306,gpt_3.5_text_davinci_003,0.449,math,helm_lite_240610,[],math | |
| 307,qwen1.5_14b,0.686,math,helm_lite_240610,[],math | |
| 308,claude_instant_1.2,0.499,math,helm_lite_240610,[],math | |
| 309,llama_3_8b,0.391,math,helm_lite_240610,[],math | |
| 310,gpt_3.5_turbo_0613,0.667,math,helm_lite_240610,[],math | |
| 311,gemma_7b,0.5,math,helm_lite_240610,[],math | |
| 312,claude_3_sonnet_20240229,0.084,math,helm_lite_240610,[],math | |
| 313,gpt_3.5_text_davinci_002,0.428,math,helm_lite_240610,[],math | |
| 314,llama_65b,0.257,math,helm_lite_240610,[],math | |
| 315,mistral_large_2402,0.75,math,helm_lite_240610,[],math | |
| 316,cohere_command,0.236,math,helm_lite_240610,[],math | |
| 317,dbrx_instructruct,0.358,math,helm_lite_240610,[],math | |
| 318,mistral_v0.1_7b,0.297,math,helm_lite_240610,[],math | |
| 319,mistral_small_2402,0.621,math,helm_lite_240610,[],math | |
| 320,mistral_medium_2312,0.565,math,helm_lite_240610,[],math | |
| 321,qwen1.5_7b,0.561,math,helm_lite_240610,[],math | |
| 322,claude_3_haiku_20240307,0.131,math,helm_lite_240610,[],math | |
| 323,yi_6b,0.126,math,helm_lite_240610,[],math | |
| 324,llama_2_13b,0.102,math,helm_lite_240610,[],math | |
| 325,jurassic_2_jumbo_178b,0.103,math,helm_lite_240610,[],math | |
| 326,falcon_40b,0.128,math,helm_lite_240610,[],math | |
| 327,phi_2,0.255,math,helm_lite_240610,[],math | |
| 328,jurassic_2_grande_17b,0.064,math,helm_lite_240610,[],math | |
| 329,llama_2_7b,0.097,math,helm_lite_240610,[],math | |
| 330,luminous_supreme_70b,0.078,math,helm_lite_240610,[],math | |
| 331,cohere_command_light,0.098,math,helm_lite_240610,[],math | |
| 332,luminous_extended_30b,0.04,math,helm_lite_240610,[],math | |
| 333,falcon_7b,0.044,math,helm_lite_240610,[],math | |
| 334,olmo_7b,0.029,math,helm_lite_240610,[],math | |
| 335,luminous_base_13b,0.026,math,helm_lite_240610,[],math | |
| 336,gpt_4_0613,0.932,gsm8k,helm_lite_240610,[],math | |
| 337,llama_3_70b,0.805,gsm8k,helm_lite_240610,[],math | |
| 338,mixtral_8x22b,0.8,gsm8k,helm_lite_240610,[],math | |
| 339,palmyra_x_v3_72b,0.831,gsm8k,helm_lite_240610,[],math | |
| 340,gpt_4_turbo_1106_preview,0.668,gsm8k,helm_lite_240610,[],math | |
| 341,palm_2_unicorn,0.831,gsm8k,helm_lite_240610,[],math | |
| 342,claude_3_opus_20240229,0.924,gsm8k,helm_lite_240610,[],math | |
| 343,qwen1.5_72b,0.799,gsm8k,helm_lite_240610,[],math | |
| 344,palmyra_x_v2_33b,0.735,gsm8k,helm_lite_240610,[],math | |
| 345,yi_34b,0.648,gsm8k,helm_lite_240610,[],math | |
| 346,qwen1.5_32b,0.773,gsm8k,helm_lite_240610,[],math | |
| 347,claude_v1.3,0.784,gsm8k,helm_lite_240610,[],math | |
| 348,mixtral_8x7b_32k_seqlen,0.622,gsm8k,helm_lite_240610,[],math | |
| 349,palm_2_bison,0.61,gsm8k,helm_lite_240610,[],math | |
| 350,claude_2.0,0.583,gsm8k,helm_lite_240610,[],math | |
| 351,deepseek_llm_chat_67b,0.795,gsm8k,helm_lite_240610,[],math | |
| 352,llama_2_70b,0.567,gsm8k,helm_lite_240610,[],math | |
| 353,claude_2.1,0.604,gsm8k,helm_lite_240610,[],math | |
| 354,gpt_3.5_text_davinci_003,0.615,gsm8k,helm_lite_240610,[],math | |
| 355,qwen1.5_14b,0.693,gsm8k,helm_lite_240610,[],math | |
| 356,claude_instant_1.2,0.721,gsm8k,helm_lite_240610,[],math | |
| 357,llama_3_8b,0.499,gsm8k,helm_lite_240610,[],math | |
| 358,gpt_3.5_turbo_0613,0.501,gsm8k,helm_lite_240610,[],math | |
| 359,gemma_7b,0.559,gsm8k,helm_lite_240610,[],math | |
| 360,claude_3_sonnet_20240229,0.907,gsm8k,helm_lite_240610,[],math | |
| 361,gpt_3.5_text_davinci_002,0.479,gsm8k,helm_lite_240610,[],math | |
| 362,llama_65b,0.489,gsm8k,helm_lite_240610,[],math | |
| 363,mistral_large_2402,0.694,gsm8k,helm_lite_240610,[],math | |
| 364,cohere_command,0.452,gsm8k,helm_lite_240610,[],math | |
| 365,dbrx_instructruct,0.671,gsm8k,helm_lite_240610,[],math | |
| 366,mistral_v0.1_7b,0.377,gsm8k,helm_lite_240610,[],math | |
| 367,mistral_small_2402,0.734,gsm8k,helm_lite_240610,[],math | |
| 368,mistral_medium_2312,0.706,gsm8k,helm_lite_240610,[],math | |
| 369,qwen1.5_7b,0.6,gsm8k,helm_lite_240610,[],math | |
| 370,claude_3_haiku_20240307,0.699,gsm8k,helm_lite_240610,[],math | |
| 371,yi_6b,0.375,gsm8k,helm_lite_240610,[],math | |
| 372,llama_2_13b,0.266,gsm8k,helm_lite_240610,[],math | |
| 373,jurassic_2_jumbo_178b,0.239,gsm8k,helm_lite_240610,[],math | |
| 374,falcon_40b,0.267,gsm8k,helm_lite_240610,[],math | |
| 375,phi_2,0.581,gsm8k,helm_lite_240610,[],math | |
| 376,jurassic_2_grande_17b,0.159,gsm8k,helm_lite_240610,[],math | |
| 377,llama_2_7b,0.154,gsm8k,helm_lite_240610,[],math | |
| 378,luminous_supreme_70b,0.137,gsm8k,helm_lite_240610,[],math | |
| 379,cohere_command_light,0.149,gsm8k,helm_lite_240610,[],math | |
| 380,luminous_extended_30b,0.075,gsm8k,helm_lite_240610,[],math | |
| 381,falcon_7b,0.055,gsm8k,helm_lite_240610,[],math | |
| 382,olmo_7b,0.044,gsm8k,helm_lite_240610,[],math | |
| 383,luminous_base_13b,0.028,gsm8k,helm_lite_240610,[],math | |
| 384,gpt_4_0613,0.713,legalbench,helm_lite_240610,[],knowledge | |
| 385,llama_3_70b,0.733,legalbench,helm_lite_240610,[],knowledge | |
| 386,mixtral_8x22b,0.708,legalbench,helm_lite_240610,[],knowledge | |
| 387,palmyra_x_v3_72b,0.709,legalbench,helm_lite_240610,[],knowledge | |
| 388,gpt_4_turbo_1106_preview,0.626,legalbench,helm_lite_240610,[],knowledge | |
| 389,palm_2_unicorn,0.677,legalbench,helm_lite_240610,[],knowledge | |
| 390,claude_3_opus_20240229,0.662,legalbench,helm_lite_240610,[],knowledge | |
| 391,qwen1.5_72b,0.694,legalbench,helm_lite_240610,[],knowledge | |
| 392,palmyra_x_v2_33b,0.644,legalbench,helm_lite_240610,[],knowledge | |
| 393,yi_34b,0.618,legalbench,helm_lite_240610,[],knowledge | |
| 394,qwen1.5_32b,0.636,legalbench,helm_lite_240610,[],knowledge | |
| 395,claude_v1.3,0.629,legalbench,helm_lite_240610,[],knowledge | |
| 396,mixtral_8x7b_32k_seqlen,0.63,legalbench,helm_lite_240610,[],knowledge | |
| 397,palm_2_bison,0.645,legalbench,helm_lite_240610,[],knowledge | |
| 398,claude_2.0,0.643,legalbench,helm_lite_240610,[],knowledge | |
| 399,deepseek_llm_chat_67b,0.637,legalbench,helm_lite_240610,[],knowledge | |
| 400,llama_2_70b,0.673,legalbench,helm_lite_240610,[],knowledge | |
| 401,claude_2.1,0.643,legalbench,helm_lite_240610,[],knowledge | |
| 402,gpt_3.5_text_davinci_003,0.622,legalbench,helm_lite_240610,[],knowledge | |
| 403,qwen1.5_14b,0.593,legalbench,helm_lite_240610,[],knowledge | |
| 404,claude_instant_1.2,0.586,legalbench,helm_lite_240610,[],knowledge | |
| 405,llama_3_8b,0.637,legalbench,helm_lite_240610,[],knowledge | |
| 406,gpt_3.5_turbo_0613,0.528,legalbench,helm_lite_240610,[],knowledge | |
| 407,gemma_7b,0.581,legalbench,helm_lite_240610,[],knowledge | |
| 408,claude_3_sonnet_20240229,0.49,legalbench,helm_lite_240610,[],knowledge | |
| 409,gpt_3.5_text_davinci_002,0.58,legalbench,helm_lite_240610,[],knowledge | |
| 410,llama_65b,0.48,legalbench,helm_lite_240610,[],knowledge | |
| 411,mistral_large_2402,0.479,legalbench,helm_lite_240610,[],knowledge | |
| 412,cohere_command,0.578,legalbench,helm_lite_240610,[],knowledge | |
| 413,dbrx_instructruct,0.426,legalbench,helm_lite_240610,[],knowledge | |
| 414,mistral_v0.1_7b,0.58,legalbench,helm_lite_240610,[],knowledge | |
| 415,mistral_small_2402,0.389,legalbench,helm_lite_240610,[],knowledge | |
| 416,mistral_medium_2312,0.452,legalbench,helm_lite_240610,[],knowledge | |
| 417,qwen1.5_7b,0.523,legalbench,helm_lite_240610,[],knowledge | |
| 418,claude_3_haiku_20240307,0.46,legalbench,helm_lite_240610,[],knowledge | |
| 419,yi_6b,0.519,legalbench,helm_lite_240610,[],knowledge | |
| 420,llama_2_13b,0.591,legalbench,helm_lite_240610,[],knowledge | |
| 421,jurassic_2_jumbo_178b,0.533,legalbench,helm_lite_240610,[],knowledge | |
| 422,falcon_40b,0.442,legalbench,helm_lite_240610,[],knowledge | |
| 423,phi_2,0.334,legalbench,helm_lite_240610,[],knowledge | |
| 424,jurassic_2_grande_17b,0.468,legalbench,helm_lite_240610,[],knowledge | |
| 425,llama_2_7b,0.502,legalbench,helm_lite_240610,[],knowledge | |
| 426,luminous_supreme_70b,0.452,legalbench,helm_lite_240610,[],knowledge | |
| 427,cohere_command_light,0.397,legalbench,helm_lite_240610,[],knowledge | |
| 428,luminous_extended_30b,0.421,legalbench,helm_lite_240610,[],knowledge | |
| 429,falcon_7b,0.346,legalbench,helm_lite_240610,[],knowledge | |
| 430,olmo_7b,0.341,legalbench,helm_lite_240610,[],knowledge | |
| 431,luminous_base_13b,0.332,legalbench,helm_lite_240610,[],knowledge | |
| 432,gpt_4_0613,0.815,medqa,helm_lite_240610,[],knowledge | |
| 433,llama_3_70b,0.777,medqa,helm_lite_240610,[],knowledge | |
| 434,mixtral_8x22b,0.704,medqa,helm_lite_240610,[],knowledge | |
| 435,palmyra_x_v3_72b,0.684,medqa,helm_lite_240610,[],knowledge | |
| 436,gpt_4_turbo_1106_preview,0.817,medqa,helm_lite_240610,[],knowledge | |
| 437,palm_2_unicorn,0.684,medqa,helm_lite_240610,[],knowledge | |
| 438,claude_3_opus_20240229,0.775,medqa,helm_lite_240610,[],knowledge | |
| 439,qwen1.5_72b,0.67,medqa,helm_lite_240610,[],knowledge | |
| 440,palmyra_x_v2_33b,0.598,medqa,helm_lite_240610,[],knowledge | |
| 441,yi_34b,0.656,medqa,helm_lite_240610,[],knowledge | |
| 442,qwen1.5_32b,0.656,medqa,helm_lite_240610,[],knowledge | |
| 443,claude_v1.3,0.618,medqa,helm_lite_240610,[],knowledge | |
| 444,mixtral_8x7b_32k_seqlen,0.652,medqa,helm_lite_240610,[],knowledge | |
| 445,palm_2_bison,0.547,medqa,helm_lite_240610,[],knowledge | |
| 446,claude_2.0,0.652,medqa,helm_lite_240610,[],knowledge | |
| 447,deepseek_llm_chat_67b,0.628,medqa,helm_lite_240610,[],knowledge | |
| 448,llama_2_70b,0.618,medqa,helm_lite_240610,[],knowledge | |
| 449,claude_2.1,0.644,medqa,helm_lite_240610,[],knowledge | |
| 450,gpt_3.5_text_davinci_003,0.531,medqa,helm_lite_240610,[],knowledge | |
| 451,qwen1.5_14b,0.515,medqa,helm_lite_240610,[],knowledge | |
| 452,claude_instant_1.2,0.559,medqa,helm_lite_240610,[],knowledge | |
| 453,llama_3_8b,0.581,medqa,helm_lite_240610,[],knowledge | |
| 454,gpt_3.5_turbo_0613,0.622,medqa,helm_lite_240610,[],knowledge | |
| 455,gemma_7b,0.513,medqa,helm_lite_240610,[],knowledge | |
| 456,claude_3_sonnet_20240229,0.684,medqa,helm_lite_240610,[],knowledge | |
| 457,gpt_3.5_text_davinci_002,0.525,medqa,helm_lite_240610,[],knowledge | |
| 458,llama_65b,0.507,medqa,helm_lite_240610,[],knowledge | |
| 459,mistral_large_2402,0.499,medqa,helm_lite_240610,[],knowledge | |
| 460,cohere_command,0.445,medqa,helm_lite_240610,[],knowledge | |
| 461,dbrx_instructruct,0.694,medqa,helm_lite_240610,[],knowledge | |
| 462,mistral_v0.1_7b,0.525,medqa,helm_lite_240610,[],knowledge | |
| 463,mistral_small_2402,0.616,medqa,helm_lite_240610,[],knowledge | |
| 464,mistral_medium_2312,0.61,medqa,helm_lite_240610,[],knowledge | |
| 465,qwen1.5_7b,0.479,medqa,helm_lite_240610,[],knowledge | |
| 466,claude_3_haiku_20240307,0.702,medqa,helm_lite_240610,[],knowledge | |
| 467,yi_6b,0.497,medqa,helm_lite_240610,[],knowledge | |
| 468,llama_2_13b,0.392,medqa,helm_lite_240610,[],knowledge | |
| 469,jurassic_2_jumbo_178b,0.431,medqa,helm_lite_240610,[],knowledge | |
| 470,falcon_40b,0.419,medqa,helm_lite_240610,[],knowledge | |
| 471,phi_2,0.41,medqa,helm_lite_240610,[],knowledge | |
| 472,jurassic_2_grande_17b,0.39,medqa,helm_lite_240610,[],knowledge | |
| 473,llama_2_7b,0.392,medqa,helm_lite_240610,[],knowledge | |
| 474,luminous_supreme_70b,0.276,medqa,helm_lite_240610,[],knowledge | |
| 475,cohere_command_light,0.312,medqa,helm_lite_240610,[],knowledge | |
| 476,luminous_extended_30b,0.276,medqa,helm_lite_240610,[],knowledge | |
| 477,falcon_7b,0.254,medqa,helm_lite_240610,[],knowledge | |
| 478,olmo_7b,0.229,medqa,helm_lite_240610,[],knowledge | |
| 479,luminous_base_13b,0.26,medqa,helm_lite_240610,[],knowledge | |
| 480,gpt_4_0613,0.211,wmt_2014,helm_lite_240610,[],mt | |
| 481,llama_3_70b,0.225,wmt_2014,helm_lite_240610,[],mt | |
| 482,mixtral_8x22b,0.209,wmt_2014,helm_lite_240610,[],mt | |
| 483,palmyra_x_v3_72b,0.262,wmt_2014,helm_lite_240610,[],mt | |
| 484,gpt_4_turbo_1106_preview,0.205,wmt_2014,helm_lite_240610,[],mt | |
| 485,palm_2_unicorn,0.26,wmt_2014,helm_lite_240610,[],mt | |
| 486,claude_3_opus_20240229,0.24,wmt_2014,helm_lite_240610,[],mt | |
| 487,qwen1.5_72b,0.201,wmt_2014,helm_lite_240610,[],mt | |
| 488,palmyra_x_v2_33b,0.239,wmt_2014,helm_lite_240610,[],mt | |
| 489,yi_34b,0.172,wmt_2014,helm_lite_240610,[],mt | |
| 490,qwen1.5_32b,0.193,wmt_2014,helm_lite_240610,[],mt | |
| 491,claude_v1.3,0.219,wmt_2014,helm_lite_240610,[],mt | |
| 492,mixtral_8x7b_32k_seqlen,0.19,wmt_2014,helm_lite_240610,[],mt | |
| 493,palm_2_bison,0.241,wmt_2014,helm_lite_240610,[],mt | |
| 494,claude_2.0,0.219,wmt_2014,helm_lite_240610,[],mt | |
| 495,deepseek_llm_chat_67b,0.186,wmt_2014,helm_lite_240610,[],mt | |
| 496,llama_2_70b,0.196,wmt_2014,helm_lite_240610,[],mt | |
| 497,claude_2.1,0.204,wmt_2014,helm_lite_240610,[],mt | |
| 498,gpt_3.5_text_davinci_003,0.191,wmt_2014,helm_lite_240610,[],mt | |
| 499,qwen1.5_14b,0.178,wmt_2014,helm_lite_240610,[],mt | |
| 500,claude_instant_1.2,0.194,wmt_2014,helm_lite_240610,[],mt | |
| 501,llama_3_8b,0.183,wmt_2014,helm_lite_240610,[],mt | |
| 502,gpt_3.5_turbo_0613,0.187,wmt_2014,helm_lite_240610,[],mt | |
| 503,gemma_7b,0.187,wmt_2014,helm_lite_240610,[],mt | |
| 504,claude_3_sonnet_20240229,0.218,wmt_2014,helm_lite_240610,[],mt | |
| 505,gpt_3.5_text_davinci_002,0.174,wmt_2014,helm_lite_240610,[],mt | |
| 506,llama_65b,0.189,wmt_2014,helm_lite_240610,[],mt | |
| 507,mistral_large_2402,0.182,wmt_2014,helm_lite_240610,[],mt | |
| 508,cohere_command,0.088,wmt_2014,helm_lite_240610,[],mt | |
| 509,dbrx_instructruct,0.131,wmt_2014,helm_lite_240610,[],mt | |
| 510,mistral_v0.1_7b,0.16,wmt_2014,helm_lite_240610,[],mt | |
| 511,mistral_small_2402,0.169,wmt_2014,helm_lite_240610,[],mt | |
| 512,mistral_medium_2312,0.169,wmt_2014,helm_lite_240610,[],mt | |
| 513,qwen1.5_7b,0.153,wmt_2014,helm_lite_240610,[],mt | |
| 514,claude_3_haiku_20240307,0.148,wmt_2014,helm_lite_240610,[],mt | |
| 515,yi_6b,0.117,wmt_2014,helm_lite_240610,[],mt | |
| 516,llama_2_13b,0.167,wmt_2014,helm_lite_240610,[],mt | |
| 517,jurassic_2_jumbo_178b,0.114,wmt_2014,helm_lite_240610,[],mt | |
| 518,falcon_40b,0.162,wmt_2014,helm_lite_240610,[],mt | |
| 519,phi_2,0.038,wmt_2014,helm_lite_240610,[],mt | |
| 520,jurassic_2_grande_17b,0.102,wmt_2014,helm_lite_240610,[],mt | |
| 521,llama_2_7b,0.144,wmt_2014,helm_lite_240610,[],mt | |
| 522,luminous_supreme_70b,0.102,wmt_2014,helm_lite_240610,[],mt | |
| 523,cohere_command_light,0.023,wmt_2014,helm_lite_240610,[],mt | |
| 524,luminous_extended_30b,0.083,wmt_2014,helm_lite_240610,[],mt | |
| 525,falcon_7b,0.094,wmt_2014,helm_lite_240610,[],mt | |
| 526,olmo_7b,0.097,wmt_2014,helm_lite_240610,[],mt | |
| 527,luminous_base_13b,0.066,wmt_2014,helm_lite_240610,[],mt | |
| 0,llama_2_70b,0.944,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 1,llama_65b,0.908,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 2,text_davinci_002,0.905,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 3,mistral_v0.1_7b,0.884,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 4,cohere_command_beta_52.4b,0.874,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 5,text_davinci_003,0.872,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 6,jurassic_2_jumbo_178b,0.824,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 7,llama_2_13b,0.823,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 8,tnlg_v2_530b,0.787,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 9,gpt_3.5_turbo_0613,0.783,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 10,llama_30b,0.781,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 11,anthropic_lm_v4_s3_52b,0.78,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 12,gpt_3.5_turbo_0301,0.76,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 13,jurassic_2_grande_17b,0.743,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 14,palmyra_x_43b,0.732,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 15,falcon_40b,0.729,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 16,falcon_instruct_40b,0.727,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 17,mpt_instruct_30b,0.716,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 18,mpt_30b,0.714,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 19,j1_grande_v2_beta_17b,0.706,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 20,vicuna_v1.3_13b,0.706,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 21,cohere_command_beta_6.1b,0.675,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 22,cohere_xlarge_v20221108_52.4b,0.664,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 23,luminous_supreme_70b,0.662,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 24,vicuna_v1.3_7b,0.625,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 25,opt_175b,0.609,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 26,llama_2_7b,0.607,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 27,llama_13b,0.595,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 28,instructpalmyra_30b,0.568,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 29,cohere_xlarge_v20220609_52.4b,0.56,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 30,jurassic_2_large_7.5b,0.553,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 31,davinci_175b,0.538,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 32,llama_7b,0.533,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 33,redpajama_incite_instruct_7b,0.524,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 34,j1_jumbo_v1_178b,0.517,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 35,glm_130b,0.512,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 36,luminous_extended_30b,0.485,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 37,opt_66b,0.448,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 38,bloom_176b,0.446,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 39,j1_grande_v1_17b,0.433,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 40,alpaca_7b,0.381,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 41,falcon_7b,0.378,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 42,redpajama_incite_base_7b,0.378,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 43,cohere_large_v20220720_13.1b,0.372,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 44,redpajama_incite_instruct_v1_3b,0.366,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 45,text_curie_001,0.36,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 46,gpt_neox_20b,0.351,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 47,luminous_base_13b,0.315,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 48,cohere_medium_v20221108_6.1b,0.312,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 49,redpajama_incite_base_v1_3b,0.311,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 50,tnlg_v2_6.7b,0.309,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 51,j1_large_v1_7.5b,0.285,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 52,gpt_j_6b,0.273,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 53,pythia_12b,0.257,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 54,curie_6.7b,0.247,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 55,falcon_instruct_7b,0.244,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 56,cohere_medium_v20220720_6.1b,0.23,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 57,text_babbage_001,0.229,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 58,t0pp_11b,0.197,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 59,pythia_6.9b,0.196,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 60,ul2_20b,0.167,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 61,t5_11b,0.131,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 62,babbage_1.3b,0.114,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 63,cohere_small_v20220720_410m,0.109,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 64,ada_350m,0.108,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 65,text_ada_001,0.107,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 66,yalm_100b,0.075,helm_mwr,helm_classic_240130,"['mmlu', 'boolq', 'narrativeqa', 'naturalquestions_closed', 'naturalquestions_open', 'quac', 'hellaswag', 'openbookqa', 'truthfulqa', 'ms_marco_regular', 'ms_marco_trec', 'cnn/dailymail', 'xsum', 'imdb', 'civilcomments', 'raft']",holistic | |
| 67,llama_2_70b,0.582,mmlu,helm_classic_240130,[],knowledge | |
| 68,llama_65b,0.584,mmlu,helm_classic_240130,[],knowledge | |
| 69,text_davinci_002,0.568,mmlu,helm_classic_240130,[],knowledge | |
| 70,mistral_v0.1_7b,0.572,mmlu,helm_classic_240130,[],knowledge | |
| 71,cohere_command_beta_52.4b,0.452,mmlu,helm_classic_240130,[],knowledge | |
| 72,text_davinci_003,0.569,mmlu,helm_classic_240130,[],knowledge | |
| 73,jurassic_2_jumbo_178b,0.48,mmlu,helm_classic_240130,[],knowledge | |
| 74,llama_2_13b,0.507,mmlu,helm_classic_240130,[],knowledge | |
| 75,tnlg_v2_530b,0.469,mmlu,helm_classic_240130,[],knowledge | |
| 76,gpt_3.5_turbo_0613,0.391,mmlu,helm_classic_240130,[],knowledge | |
| 77,llama_30b,0.531,mmlu,helm_classic_240130,[],knowledge | |
| 78,anthropic_lm_v4_s3_52b,0.481,mmlu,helm_classic_240130,[],knowledge | |
| 79,gpt_3.5_turbo_0301,0.59,mmlu,helm_classic_240130,[],knowledge | |
| 80,jurassic_2_grande_17b,0.475,mmlu,helm_classic_240130,[],knowledge | |
| 81,palmyra_x_43b,0.609,mmlu,helm_classic_240130,[],knowledge | |
| 82,falcon_40b,0.509,mmlu,helm_classic_240130,[],knowledge | |
| 83,falcon_instruct_40b,0.497,mmlu,helm_classic_240130,[],knowledge | |
| 84,mpt_instruct_30b,0.444,mmlu,helm_classic_240130,[],knowledge | |
| 85,mpt_30b,0.437,mmlu,helm_classic_240130,[],knowledge | |
| 86,j1_grande_v2_beta_17b,0.445,mmlu,helm_classic_240130,[],knowledge | |
| 87,vicuna_v1.3_13b,0.462,mmlu,helm_classic_240130,[],knowledge | |
| 88,cohere_command_beta_6.1b,0.406,mmlu,helm_classic_240130,[],knowledge | |
| 89,cohere_xlarge_v20221108_52.4b,0.382,mmlu,helm_classic_240130,[],knowledge | |
| 90,luminous_supreme_70b,0.38,mmlu,helm_classic_240130,[],knowledge | |
| 91,vicuna_v1.3_7b,0.434,mmlu,helm_classic_240130,[],knowledge | |
| 92,opt_175b,0.318,mmlu,helm_classic_240130,[],knowledge | |
| 93,llama_2_7b,0.431,mmlu,helm_classic_240130,[],knowledge | |
| 94,llama_13b,0.422,mmlu,helm_classic_240130,[],knowledge | |
| 95,instructpalmyra_30b,0.403,mmlu,helm_classic_240130,[],knowledge | |
| 96,cohere_xlarge_v20220609_52.4b,0.353,mmlu,helm_classic_240130,[],knowledge | |
| 97,jurassic_2_large_7.5b,0.339,mmlu,helm_classic_240130,[],knowledge | |
| 98,davinci_175b,0.422,mmlu,helm_classic_240130,[],knowledge | |
| 99,llama_7b,0.321,mmlu,helm_classic_240130,[],knowledge | |
| 100,redpajama_incite_instruct_7b,0.363,mmlu,helm_classic_240130,[],knowledge | |
| 101,j1_jumbo_v1_178b,0.259,mmlu,helm_classic_240130,[],knowledge | |
| 102,glm_130b,0.344,mmlu,helm_classic_240130,[],knowledge | |
| 103,luminous_extended_30b,0.321,mmlu,helm_classic_240130,[],knowledge | |
| 104,opt_66b,0.276,mmlu,helm_classic_240130,[],knowledge | |
| 105,bloom_176b,0.299,mmlu,helm_classic_240130,[],knowledge | |
| 106,j1_grande_v1_17b,0.27,mmlu,helm_classic_240130,[],knowledge | |
| 107,alpaca_7b,0.385,mmlu,helm_classic_240130,[],knowledge | |
| 108,falcon_7b,0.286,mmlu,helm_classic_240130,[],knowledge | |
| 109,redpajama_incite_base_7b,0.302,mmlu,helm_classic_240130,[],knowledge | |
| 110,cohere_large_v20220720_13.1b,0.324,mmlu,helm_classic_240130,[],knowledge | |
| 111,redpajama_incite_instruct_v1_3b,0.257,mmlu,helm_classic_240130,[],knowledge | |
| 112,text_curie_001,0.237,mmlu,helm_classic_240130,[],knowledge | |
| 113,gpt_neox_20b,0.276,mmlu,helm_classic_240130,[],knowledge | |
| 114,luminous_base_13b,0.27,mmlu,helm_classic_240130,[],knowledge | |
| 115,cohere_medium_v20221108_6.1b,0.254,mmlu,helm_classic_240130,[],knowledge | |
| 116,redpajama_incite_base_v1_3b,0.263,mmlu,helm_classic_240130,[],knowledge | |
| 117,tnlg_v2_6.7b,0.242,mmlu,helm_classic_240130,[],knowledge | |
| 118,j1_large_v1_7.5b,0.241,mmlu,helm_classic_240130,[],knowledge | |
| 119,gpt_j_6b,0.249,mmlu,helm_classic_240130,[],knowledge | |
| 120,pythia_12b,0.274,mmlu,helm_classic_240130,[],knowledge | |
| 121,curie_6.7b,0.243,mmlu,helm_classic_240130,[],knowledge | |
| 122,falcon_instruct_7b,0.275,mmlu,helm_classic_240130,[],knowledge | |
| 123,cohere_medium_v20220720_6.1b,0.279,mmlu,helm_classic_240130,[],knowledge | |
| 124,text_babbage_001,0.229,mmlu,helm_classic_240130,[],knowledge | |
| 125,t0pp_11b,0.407,mmlu,helm_classic_240130,[],knowledge | |
| 126,pythia_6.9b,0.236,mmlu,helm_classic_240130,[],knowledge | |
| 127,ul2_20b,0.291,mmlu,helm_classic_240130,[],knowledge | |
| 128,t5_11b,0.29,mmlu,helm_classic_240130,[],knowledge | |
| 129,babbage_1.3b,0.235,mmlu,helm_classic_240130,[],knowledge | |
| 130,cohere_small_v20220720_410m,0.264,mmlu,helm_classic_240130,[],knowledge | |
| 131,ada_350m,0.243,mmlu,helm_classic_240130,[],knowledge | |
| 132,text_ada_001,0.238,mmlu,helm_classic_240130,[],knowledge | |
| 133,yalm_100b,0.243,mmlu,helm_classic_240130,[],knowledge | |
| 134,llama_2_70b,0.886,boolq,helm_classic_240130,[],knowledge | |
| 135,llama_65b,0.871,boolq,helm_classic_240130,[],knowledge | |
| 136,text_davinci_002,0.877,boolq,helm_classic_240130,[],knowledge | |
| 137,mistral_v0.1_7b,0.874,boolq,helm_classic_240130,[],knowledge | |
| 138,cohere_command_beta_52.4b,0.856,boolq,helm_classic_240130,[],knowledge | |
| 139,text_davinci_003,0.881,boolq,helm_classic_240130,[],knowledge | |
| 140,jurassic_2_jumbo_178b,0.829,boolq,helm_classic_240130,[],knowledge | |
| 141,llama_2_13b,0.811,boolq,helm_classic_240130,[],knowledge | |
| 142,tnlg_v2_530b,0.809,boolq,helm_classic_240130,[],knowledge | |
| 143,gpt_3.5_turbo_0613,0.87,boolq,helm_classic_240130,[],knowledge | |
| 144,llama_30b,0.861,boolq,helm_classic_240130,[],knowledge | |
| 145,anthropic_lm_v4_s3_52b,0.815,boolq,helm_classic_240130,[],knowledge | |
| 146,gpt_3.5_turbo_0301,0.74,boolq,helm_classic_240130,[],knowledge | |
| 147,jurassic_2_grande_17b,0.826,boolq,helm_classic_240130,[],knowledge | |
| 148,palmyra_x_43b,0.896,boolq,helm_classic_240130,[],knowledge | |
| 149,falcon_40b,0.819,boolq,helm_classic_240130,[],knowledge | |
| 150,falcon_instruct_40b,0.829,boolq,helm_classic_240130,[],knowledge | |
| 151,mpt_instruct_30b,0.85,boolq,helm_classic_240130,[],knowledge | |
| 152,mpt_30b,0.704,boolq,helm_classic_240130,[],knowledge | |
| 153,j1_grande_v2_beta_17b,0.812,boolq,helm_classic_240130,[],knowledge | |
| 154,vicuna_v1.3_13b,0.808,boolq,helm_classic_240130,[],knowledge | |
| 155,cohere_command_beta_6.1b,0.798,boolq,helm_classic_240130,[],knowledge | |
| 156,cohere_xlarge_v20221108_52.4b,0.762,boolq,helm_classic_240130,[],knowledge | |
| 157,luminous_supreme_70b,0.775,boolq,helm_classic_240130,[],knowledge | |
| 158,vicuna_v1.3_7b,0.76,boolq,helm_classic_240130,[],knowledge | |
| 159,opt_175b,0.793,boolq,helm_classic_240130,[],knowledge | |
| 160,llama_2_7b,0.762,boolq,helm_classic_240130,[],knowledge | |
| 161,llama_13b,0.714,boolq,helm_classic_240130,[],knowledge | |
| 162,instructpalmyra_30b,0.751,boolq,helm_classic_240130,[],knowledge | |
| 163,cohere_xlarge_v20220609_52.4b,0.718,boolq,helm_classic_240130,[],knowledge | |
| 164,jurassic_2_large_7.5b,0.742,boolq,helm_classic_240130,[],knowledge | |
| 165,davinci_175b,0.722,boolq,helm_classic_240130,[],knowledge | |
| 166,llama_7b,0.756,boolq,helm_classic_240130,[],knowledge | |
| 167,redpajama_incite_instruct_7b,0.705,boolq,helm_classic_240130,[],knowledge | |
| 168,j1_jumbo_v1_178b,0.776,boolq,helm_classic_240130,[],knowledge | |
| 169,glm_130b,0.784,boolq,helm_classic_240130,[],knowledge | |
| 170,luminous_extended_30b,0.767,boolq,helm_classic_240130,[],knowledge | |
| 171,opt_66b,0.76,boolq,helm_classic_240130,[],knowledge | |
| 172,bloom_176b,0.704,boolq,helm_classic_240130,[],knowledge | |
| 173,j1_grande_v1_17b,0.722,boolq,helm_classic_240130,[],knowledge | |
| 174,alpaca_7b,0.778,boolq,helm_classic_240130,[],knowledge | |
| 175,falcon_7b,0.753,boolq,helm_classic_240130,[],knowledge | |
| 176,redpajama_incite_base_7b,0.713,boolq,helm_classic_240130,[],knowledge | |
| 177,cohere_large_v20220720_13.1b,0.725,boolq,helm_classic_240130,[],knowledge | |
| 178,redpajama_incite_instruct_v1_3b,0.677,boolq,helm_classic_240130,[],knowledge | |
| 179,text_curie_001,0.62,boolq,helm_classic_240130,[],knowledge | |
| 180,gpt_neox_20b,0.683,boolq,helm_classic_240130,[],knowledge | |
| 181,luminous_base_13b,0.719,boolq,helm_classic_240130,[],knowledge | |
| 182,cohere_medium_v20221108_6.1b,0.7,boolq,helm_classic_240130,[],knowledge | |
| 183,redpajama_incite_base_v1_3b,0.685,boolq,helm_classic_240130,[],knowledge | |
| 184,tnlg_v2_6.7b,0.698,boolq,helm_classic_240130,[],knowledge | |
| 185,j1_large_v1_7.5b,0.683,boolq,helm_classic_240130,[],knowledge | |
| 186,gpt_j_6b,0.649,boolq,helm_classic_240130,[],knowledge | |
| 187,pythia_12b,0.662,boolq,helm_classic_240130,[],knowledge | |
| 188,curie_6.7b,0.656,boolq,helm_classic_240130,[],knowledge | |
| 189,falcon_instruct_7b,0.72,boolq,helm_classic_240130,[],knowledge | |
| 190,cohere_medium_v20220720_6.1b,0.659,boolq,helm_classic_240130,[],knowledge | |
| 191,text_babbage_001,0.451,boolq,helm_classic_240130,[],knowledge | |
| 192,t0pp_11b,0.0,boolq,helm_classic_240130,[],knowledge | |
| 193,pythia_6.9b,0.631,boolq,helm_classic_240130,[],knowledge | |
| 194,ul2_20b,0.746,boolq,helm_classic_240130,[],knowledge | |
| 195,t5_11b,0.761,boolq,helm_classic_240130,[],knowledge | |
| 196,babbage_1.3b,0.574,boolq,helm_classic_240130,[],knowledge | |
| 197,cohere_small_v20220720_410m,0.457,boolq,helm_classic_240130,[],knowledge | |
| 198,ada_350m,0.581,boolq,helm_classic_240130,[],knowledge | |
| 199,text_ada_001,0.464,boolq,helm_classic_240130,[],knowledge | |
| 200,yalm_100b,0.634,boolq,helm_classic_240130,[],knowledge | |
| 201,llama_2_70b,0.77,narrativeqa,helm_classic_240130,[],knowledge | |
| 202,llama_65b,0.755,narrativeqa,helm_classic_240130,[],knowledge | |
| 203,text_davinci_002,0.727,narrativeqa,helm_classic_240130,[],knowledge | |
| 204,mistral_v0.1_7b,0.716,narrativeqa,helm_classic_240130,[],knowledge | |
| 205,cohere_command_beta_52.4b,0.752,narrativeqa,helm_classic_240130,[],knowledge | |
| 206,text_davinci_003,0.727,narrativeqa,helm_classic_240130,[],knowledge | |
| 207,jurassic_2_jumbo_178b,0.733,narrativeqa,helm_classic_240130,[],knowledge | |
| 208,llama_2_13b,0.744,narrativeqa,helm_classic_240130,[],knowledge | |
| 209,tnlg_v2_530b,0.722,narrativeqa,helm_classic_240130,[],knowledge | |
| 210,gpt_3.5_turbo_0613,0.625,narrativeqa,helm_classic_240130,[],knowledge | |
| 211,llama_30b,0.752,narrativeqa,helm_classic_240130,[],knowledge | |
| 212,anthropic_lm_v4_s3_52b,0.728,narrativeqa,helm_classic_240130,[],knowledge | |
| 213,gpt_3.5_turbo_0301,0.663,narrativeqa,helm_classic_240130,[],knowledge | |
| 214,jurassic_2_grande_17b,0.737,narrativeqa,helm_classic_240130,[],knowledge | |
| 215,palmyra_x_43b,0.742,narrativeqa,helm_classic_240130,[],knowledge | |
| 216,falcon_40b,0.673,narrativeqa,helm_classic_240130,[],knowledge | |
| 217,falcon_instruct_40b,0.625,narrativeqa,helm_classic_240130,[],knowledge | |
| 218,mpt_instruct_30b,0.733,narrativeqa,helm_classic_240130,[],knowledge | |
| 219,mpt_30b,0.732,narrativeqa,helm_classic_240130,[],knowledge | |
| 220,j1_grande_v2_beta_17b,0.725,narrativeqa,helm_classic_240130,[],knowledge | |
| 221,vicuna_v1.3_13b,0.691,narrativeqa,helm_classic_240130,[],knowledge | |
| 222,cohere_command_beta_6.1b,0.709,narrativeqa,helm_classic_240130,[],knowledge | |
| 223,cohere_xlarge_v20221108_52.4b,0.672,narrativeqa,helm_classic_240130,[],knowledge | |
| 224,luminous_supreme_70b,0.711,narrativeqa,helm_classic_240130,[],knowledge | |
| 225,vicuna_v1.3_7b,0.643,narrativeqa,helm_classic_240130,[],knowledge | |
| 226,opt_175b,0.671,narrativeqa,helm_classic_240130,[],knowledge | |
| 227,llama_2_7b,0.691,narrativeqa,helm_classic_240130,[],knowledge | |
| 228,llama_13b,0.711,narrativeqa,helm_classic_240130,[],knowledge | |
| 229,instructpalmyra_30b,0.496,narrativeqa,helm_classic_240130,[],knowledge | |
| 230,cohere_xlarge_v20220609_52.4b,0.65,narrativeqa,helm_classic_240130,[],knowledge | |
| 232,davinci_175b,0.687,narrativeqa,helm_classic_240130,[],knowledge | |
| 233,llama_7b,0.669,narrativeqa,helm_classic_240130,[],knowledge | |
| 234,redpajama_incite_instruct_7b,0.638,narrativeqa,helm_classic_240130,[],knowledge | |
| 235,j1_jumbo_v1_178b,0.695,narrativeqa,helm_classic_240130,[],knowledge | |
| 236,glm_130b,0.706,narrativeqa,helm_classic_240130,[],knowledge | |
| 237,luminous_extended_30b,0.665,narrativeqa,helm_classic_240130,[],knowledge | |
| 238,opt_66b,0.638,narrativeqa,helm_classic_240130,[],knowledge | |
| 239,bloom_176b,0.662,narrativeqa,helm_classic_240130,[],knowledge | |
| 240,j1_grande_v1_17b,0.672,narrativeqa,helm_classic_240130,[],knowledge | |
| 241,alpaca_7b,0.396,narrativeqa,helm_classic_240130,[],knowledge | |
| 242,falcon_7b,0.621,narrativeqa,helm_classic_240130,[],knowledge | |
| 243,redpajama_incite_base_7b,0.617,narrativeqa,helm_classic_240130,[],knowledge | |
| 244,cohere_large_v20220720_13.1b,0.625,narrativeqa,helm_classic_240130,[],knowledge | |
| 245,redpajama_incite_instruct_v1_3b,0.638,narrativeqa,helm_classic_240130,[],knowledge | |
| 246,text_curie_001,0.582,narrativeqa,helm_classic_240130,[],knowledge | |
| 247,gpt_neox_20b,0.599,narrativeqa,helm_classic_240130,[],knowledge | |
| 248,luminous_base_13b,0.605,narrativeqa,helm_classic_240130,[],knowledge | |
| 249,cohere_medium_v20221108_6.1b,0.61,narrativeqa,helm_classic_240130,[],knowledge | |
| 250,redpajama_incite_base_v1_3b,0.555,narrativeqa,helm_classic_240130,[],knowledge | |
| 251,tnlg_v2_6.7b,0.631,narrativeqa,helm_classic_240130,[],knowledge | |
| 252,j1_large_v1_7.5b,0.623,narrativeqa,helm_classic_240130,[],knowledge | |
| 253,gpt_j_6b,0.545,narrativeqa,helm_classic_240130,[],knowledge | |
| 254,pythia_12b,0.596,narrativeqa,helm_classic_240130,[],knowledge | |
| 255,curie_6.7b,0.604,narrativeqa,helm_classic_240130,[],knowledge | |
| 256,falcon_instruct_7b,0.476,narrativeqa,helm_classic_240130,[],knowledge | |
| 257,cohere_medium_v20220720_6.1b,0.559,narrativeqa,helm_classic_240130,[],knowledge | |
| 258,text_babbage_001,0.429,narrativeqa,helm_classic_240130,[],knowledge | |
| 259,t0pp_11b,0.151,narrativeqa,helm_classic_240130,[],knowledge | |
| 260,pythia_6.9b,0.528,narrativeqa,helm_classic_240130,[],knowledge | |
| 261,ul2_20b,0.083,narrativeqa,helm_classic_240130,[],knowledge | |
| 262,t5_11b,0.086,narrativeqa,helm_classic_240130,[],knowledge | |
| 263,babbage_1.3b,0.491,narrativeqa,helm_classic_240130,[],knowledge | |
| 264,cohere_small_v20220720_410m,0.294,narrativeqa,helm_classic_240130,[],knowledge | |
| 265,ada_350m,0.326,narrativeqa,helm_classic_240130,[],knowledge | |
| 266,text_ada_001,0.238,narrativeqa,helm_classic_240130,[],knowledge | |
| 267,yalm_100b,0.252,narrativeqa,helm_classic_240130,[],knowledge | |
| 268,llama_2_70b,0.458,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 269,llama_65b,0.431,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 270,text_davinci_002,0.383,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 271,mistral_v0.1_7b,0.365,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 272,cohere_command_beta_52.4b,0.372,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 273,text_davinci_003,0.406,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 274,jurassic_2_jumbo_178b,0.385,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 275,llama_2_13b,0.376,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 276,tnlg_v2_530b,0.384,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 277,gpt_3.5_turbo_0613,0.348,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 278,llama_30b,0.408,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 279,anthropic_lm_v4_s3_52b,0.288,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 280,gpt_3.5_turbo_0301,0.39,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 281,jurassic_2_grande_17b,0.356,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 282,palmyra_x_43b,0.413,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 283,falcon_40b,0.392,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 284,falcon_instruct_40b,0.377,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 285,mpt_instruct_30b,0.304,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 286,mpt_30b,0.347,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 287,j1_grande_v2_beta_17b,0.337,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 288,vicuna_v1.3_13b,0.346,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 289,cohere_command_beta_6.1b,0.229,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 290,cohere_xlarge_v20221108_52.4b,0.361,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 291,luminous_supreme_70b,0.293,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 292,vicuna_v1.3_7b,0.287,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 293,opt_175b,0.297,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 294,llama_2_7b,0.337,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 295,llama_13b,0.346,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 296,instructpalmyra_30b,0.33,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 297,cohere_xlarge_v20220609_52.4b,0.312,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 298,jurassic_2_large_7.5b,0.274,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 299,davinci_175b,0.329,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 300,llama_7b,0.297,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 301,redpajama_incite_instruct_7b,0.232,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 302,j1_jumbo_v1_178b,0.293,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 303,glm_130b,0.148,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 304,luminous_extended_30b,0.254,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 305,opt_66b,0.258,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 306,bloom_176b,0.216,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 307,j1_grande_v1_17b,0.233,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 308,alpaca_7b,0.266,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 309,falcon_7b,0.285,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 310,redpajama_incite_base_7b,0.25,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 311,cohere_large_v20220720_13.1b,0.232,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 312,redpajama_incite_instruct_v1_3b,0.203,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 313,text_curie_001,0.175,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 314,gpt_neox_20b,0.193,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 315,luminous_base_13b,0.202,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 316,cohere_medium_v20221108_6.1b,0.199,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 317,redpajama_incite_base_v1_3b,0.207,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 318,tnlg_v2_6.7b,0.21,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 319,j1_large_v1_7.5b,0.19,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 320,gpt_j_6b,0.156,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 321,pythia_12b,0.175,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 322,curie_6.7b,0.199,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 323,falcon_instruct_7b,0.194,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 324,cohere_medium_v20220720_6.1b,0.177,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 325,text_babbage_001,0.07,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 326,t0pp_11b,0.039,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 327,pythia_6.9b,0.142,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 328,ul2_20b,0.204,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 329,t5_11b,0.194,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 330,babbage_1.3b,0.119,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 331,cohere_small_v20220720_410m,0.078,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 332,ada_350m,0.082,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 333,text_ada_001,0.025,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 334,yalm_100b,0.068,naturalquestions_closed,helm_classic_240130,[],knowledge | |
| 335,llama_2_70b,0.674,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 336,llama_65b,0.672,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 337,text_davinci_002,0.713,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 338,mistral_v0.1_7b,0.687,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 339,cohere_command_beta_52.4b,0.76,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 340,text_davinci_003,0.77,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 341,jurassic_2_jumbo_178b,0.669,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 342,llama_2_13b,0.637,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 343,tnlg_v2_530b,0.642,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 344,gpt_3.5_turbo_0613,0.675,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 345,llama_30b,0.666,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 346,anthropic_lm_v4_s3_52b,0.686,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 347,gpt_3.5_turbo_0301,0.624,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 348,jurassic_2_grande_17b,0.639,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 350,falcon_40b,0.675,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 351,falcon_instruct_40b,0.666,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 352,mpt_instruct_30b,0.697,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 353,mpt_30b,0.673,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 354,j1_grande_v2_beta_17b,0.625,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 355,vicuna_v1.3_13b,0.686,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 356,cohere_command_beta_6.1b,0.717,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 357,cohere_xlarge_v20221108_52.4b,0.628,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 358,luminous_supreme_70b,0.649,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 359,vicuna_v1.3_7b,0.634,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 360,opt_175b,0.615,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 361,llama_2_7b,0.611,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 362,llama_13b,0.614,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 363,instructpalmyra_30b,0.682,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 364,cohere_xlarge_v20220609_52.4b,0.595,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 365,jurassic_2_large_7.5b,0.589,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 366,davinci_175b,0.625,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 367,llama_7b,0.589,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 368,redpajama_incite_instruct_7b,0.659,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 369,j1_jumbo_v1_178b,0.595,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 370,glm_130b,0.642,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 371,luminous_extended_30b,0.609,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 372,opt_66b,0.596,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 373,bloom_176b,0.621,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 374,j1_grande_v1_17b,0.578,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 375,alpaca_7b,0.592,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 376,falcon_7b,0.579,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 377,redpajama_incite_base_7b,0.586,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 378,cohere_large_v20220720_13.1b,0.573,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 379,redpajama_incite_instruct_v1_3b,0.637,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 380,text_curie_001,0.571,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 381,gpt_neox_20b,0.596,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 382,luminous_base_13b,0.568,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 383,cohere_medium_v20221108_6.1b,0.517,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 384,redpajama_incite_base_v1_3b,0.52,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 385,tnlg_v2_6.7b,0.561,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 386,j1_large_v1_7.5b,0.532,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 387,gpt_j_6b,0.559,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 388,pythia_12b,0.581,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 389,curie_6.7b,0.552,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 390,falcon_instruct_7b,0.449,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 391,cohere_medium_v20220720_6.1b,0.504,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 392,text_babbage_001,0.33,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 393,t0pp_11b,0.19,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 394,pythia_6.9b,0.539,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 395,ul2_20b,0.349,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 396,t5_11b,0.477,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 397,babbage_1.3b,0.451,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 398,cohere_small_v20220720_410m,0.309,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 399,ada_350m,0.365,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 400,text_ada_001,0.149,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 401,yalm_100b,0.227,naturalquestions_open,helm_classic_240130,[],knowledge | |
| 402,llama_2_70b,0.484,quac,helm_classic_240130,[],other | |
| 403,llama_65b,0.401,quac,helm_classic_240130,[],other | |
| 404,text_davinci_002,0.445,quac,helm_classic_240130,[],other | |
| 405,mistral_v0.1_7b,0.423,quac,helm_classic_240130,[],other | |
| 406,cohere_command_beta_52.4b,0.432,quac,helm_classic_240130,[],other | |
| 407,text_davinci_003,0.525,quac,helm_classic_240130,[],other | |
| 408,jurassic_2_jumbo_178b,0.435,quac,helm_classic_240130,[],other | |
| 409,llama_2_13b,0.424,quac,helm_classic_240130,[],other | |
| 410,tnlg_v2_530b,0.39,quac,helm_classic_240130,[],other | |
| 411,gpt_3.5_turbo_0613,0.485,quac,helm_classic_240130,[],other | |
| 412,llama_30b,0.39,quac,helm_classic_240130,[],other | |
| 413,anthropic_lm_v4_s3_52b,0.431,quac,helm_classic_240130,[],other | |
| 414,gpt_3.5_turbo_0301,0.512,quac,helm_classic_240130,[],other | |
| 415,jurassic_2_grande_17b,0.418,quac,helm_classic_240130,[],other | |
| 416,palmyra_x_43b,0.473,quac,helm_classic_240130,[],other | |
| 417,falcon_40b,0.307,quac,helm_classic_240130,[],other | |
| 418,falcon_instruct_40b,0.371,quac,helm_classic_240130,[],other | |
| 419,mpt_instruct_30b,0.327,quac,helm_classic_240130,[],other | |
| 420,mpt_30b,0.393,quac,helm_classic_240130,[],other | |
| 421,j1_grande_v2_beta_17b,0.392,quac,helm_classic_240130,[],other | |
| 422,vicuna_v1.3_13b,0.403,quac,helm_classic_240130,[],other | |
| 423,cohere_command_beta_6.1b,0.375,quac,helm_classic_240130,[],other | |
| 424,cohere_xlarge_v20221108_52.4b,0.374,quac,helm_classic_240130,[],other | |
| 425,luminous_supreme_70b,0.37,quac,helm_classic_240130,[],other | |
| 426,vicuna_v1.3_7b,0.392,quac,helm_classic_240130,[],other | |
| 427,opt_175b,0.36,quac,helm_classic_240130,[],other | |
| 428,llama_2_7b,0.406,quac,helm_classic_240130,[],other | |
| 429,llama_13b,0.347,quac,helm_classic_240130,[],other | |
| 430,instructpalmyra_30b,0.433,quac,helm_classic_240130,[],other | |
| 431,cohere_xlarge_v20220609_52.4b,0.361,quac,helm_classic_240130,[],other | |
| 433,davinci_175b,0.36,quac,helm_classic_240130,[],other | |
| 434,llama_7b,0.338,quac,helm_classic_240130,[],other | |
| 435,redpajama_incite_instruct_7b,0.26,quac,helm_classic_240130,[],other | |
| 436,j1_jumbo_v1_178b,0.358,quac,helm_classic_240130,[],other | |
| 437,glm_130b,0.272,quac,helm_classic_240130,[],other | |
| 438,luminous_extended_30b,0.349,quac,helm_classic_240130,[],other | |
| 439,opt_66b,0.357,quac,helm_classic_240130,[],other | |
| 440,bloom_176b,0.361,quac,helm_classic_240130,[],other | |
| 441,j1_grande_v1_17b,0.362,quac,helm_classic_240130,[],other | |
| 442,alpaca_7b,0.27,quac,helm_classic_240130,[],other | |
| 443,falcon_7b,0.332,quac,helm_classic_240130,[],other | |
| 444,redpajama_incite_base_7b,0.336,quac,helm_classic_240130,[],other | |
| 445,cohere_large_v20220720_13.1b,0.338,quac,helm_classic_240130,[],other | |
| 446,redpajama_incite_instruct_v1_3b,0.259,quac,helm_classic_240130,[],other | |
| 447,text_curie_001,0.358,quac,helm_classic_240130,[],other | |
| 448,gpt_neox_20b,0.326,quac,helm_classic_240130,[],other | |
| 449,luminous_base_13b,0.334,quac,helm_classic_240130,[],other | |
| 450,cohere_medium_v20221108_6.1b,0.314,quac,helm_classic_240130,[],other | |
| 451,redpajama_incite_base_v1_3b,0.309,quac,helm_classic_240130,[],other | |
| 452,tnlg_v2_6.7b,0.345,quac,helm_classic_240130,[],other | |
| 453,j1_large_v1_7.5b,0.328,quac,helm_classic_240130,[],other | |
| 454,gpt_j_6b,0.33,quac,helm_classic_240130,[],other | |
| 455,pythia_12b,0.313,quac,helm_classic_240130,[],other | |
| 456,curie_6.7b,0.321,quac,helm_classic_240130,[],other | |
| 457,falcon_instruct_7b,0.311,quac,helm_classic_240130,[],other | |
| 458,cohere_medium_v20220720_6.1b,0.279,quac,helm_classic_240130,[],other | |
| 459,text_babbage_001,0.284,quac,helm_classic_240130,[],other | |
| 460,t0pp_11b,0.121,quac,helm_classic_240130,[],other | |
| 461,pythia_6.9b,0.296,quac,helm_classic_240130,[],other | |
| 462,ul2_20b,0.144,quac,helm_classic_240130,[],other | |
| 463,t5_11b,0.116,quac,helm_classic_240130,[],other | |
| 464,babbage_1.3b,0.273,quac,helm_classic_240130,[],other | |
| 465,cohere_small_v20220720_410m,0.219,quac,helm_classic_240130,[],other | |
| 466,ada_350m,0.242,quac,helm_classic_240130,[],other | |
| 467,text_ada_001,0.176,quac,helm_classic_240130,[],other | |
| 468,yalm_100b,0.162,quac,helm_classic_240130,[],other | |
| 471,text_davinci_002,0.815,hellaswag,helm_classic_240130,[],reasoning | |
| 473,cohere_command_beta_52.4b,0.811,hellaswag,helm_classic_240130,[],reasoning | |
| 474,text_davinci_003,0.822,hellaswag,helm_classic_240130,[],reasoning | |
| 475,jurassic_2_jumbo_178b,0.788,hellaswag,helm_classic_240130,[],reasoning | |
| 477,tnlg_v2_530b,0.799,hellaswag,helm_classic_240130,[],reasoning | |
| 480,anthropic_lm_v4_s3_52b,0.807,hellaswag,helm_classic_240130,[],reasoning | |
| 482,jurassic_2_grande_17b,0.781,hellaswag,helm_classic_240130,[],reasoning | |
| 488,j1_grande_v2_beta_17b,0.764,hellaswag,helm_classic_240130,[],reasoning | |
| 490,cohere_command_beta_6.1b,0.752,hellaswag,helm_classic_240130,[],reasoning | |
| 491,cohere_xlarge_v20221108_52.4b,0.81,hellaswag,helm_classic_240130,[],reasoning | |
| 494,opt_175b,0.791,hellaswag,helm_classic_240130,[],reasoning | |
| 498,cohere_xlarge_v20220609_52.4b,0.811,hellaswag,helm_classic_240130,[],reasoning | |
| 499,jurassic_2_large_7.5b,0.729,hellaswag,helm_classic_240130,[],reasoning | |
| 500,davinci_175b,0.775,hellaswag,helm_classic_240130,[],reasoning | |
| 503,j1_jumbo_v1_178b,0.765,hellaswag,helm_classic_240130,[],reasoning | |
| 506,opt_66b,0.745,hellaswag,helm_classic_240130,[],reasoning | |
| 507,bloom_176b,0.744,hellaswag,helm_classic_240130,[],reasoning | |
| 508,j1_grande_v1_17b,0.739,hellaswag,helm_classic_240130,[],reasoning | |
| 512,cohere_large_v20220720_13.1b,0.736,hellaswag,helm_classic_240130,[],reasoning | |
| 514,text_curie_001,0.676,hellaswag,helm_classic_240130,[],reasoning | |
| 515,gpt_neox_20b,0.718,hellaswag,helm_classic_240130,[],reasoning | |
| 517,cohere_medium_v20221108_6.1b,0.726,hellaswag,helm_classic_240130,[],reasoning | |
| 519,tnlg_v2_6.7b,0.704,hellaswag,helm_classic_240130,[],reasoning | |
| 520,j1_large_v1_7.5b,0.7,hellaswag,helm_classic_240130,[],reasoning | |
| 521,gpt_j_6b,0.663,hellaswag,helm_classic_240130,[],reasoning | |
| 523,curie_6.7b,0.682,hellaswag,helm_classic_240130,[],reasoning | |
| 525,cohere_medium_v20220720_6.1b,0.706,hellaswag,helm_classic_240130,[],reasoning | |
| 526,text_babbage_001,0.561,hellaswag,helm_classic_240130,[],reasoning | |
| 531,babbage_1.3b,0.555,hellaswag,helm_classic_240130,[],reasoning | |
| 532,cohere_small_v20220720_410m,0.483,hellaswag,helm_classic_240130,[],reasoning | |
| 533,ada_350m,0.435,hellaswag,helm_classic_240130,[],reasoning | |
| 534,text_ada_001,0.429,hellaswag,helm_classic_240130,[],reasoning | |
| 538,text_davinci_002,0.594,openbookqa,helm_classic_240130,[],knowledge | |
| 540,cohere_command_beta_52.4b,0.582,openbookqa,helm_classic_240130,[],knowledge | |
| 541,text_davinci_003,0.646,openbookqa,helm_classic_240130,[],knowledge | |
| 542,jurassic_2_jumbo_178b,0.558,openbookqa,helm_classic_240130,[],knowledge | |
| 544,tnlg_v2_530b,0.562,openbookqa,helm_classic_240130,[],knowledge | |
| 547,anthropic_lm_v4_s3_52b,0.558,openbookqa,helm_classic_240130,[],knowledge | |
| 549,jurassic_2_grande_17b,0.542,openbookqa,helm_classic_240130,[],knowledge | |
| 555,j1_grande_v2_beta_17b,0.56,openbookqa,helm_classic_240130,[],knowledge | |
| 557,cohere_command_beta_6.1b,0.55,openbookqa,helm_classic_240130,[],knowledge | |
| 558,cohere_xlarge_v20221108_52.4b,0.588,openbookqa,helm_classic_240130,[],knowledge | |
| 561,opt_175b,0.586,openbookqa,helm_classic_240130,[],knowledge | |
| 565,cohere_xlarge_v20220609_52.4b,0.55,openbookqa,helm_classic_240130,[],knowledge | |
| 566,jurassic_2_large_7.5b,0.53,openbookqa,helm_classic_240130,[],knowledge | |
| 567,davinci_175b,0.586,openbookqa,helm_classic_240130,[],knowledge | |
| 570,j1_jumbo_v1_178b,0.534,openbookqa,helm_classic_240130,[],knowledge | |
| 573,opt_66b,0.534,openbookqa,helm_classic_240130,[],knowledge | |
| 574,bloom_176b,0.534,openbookqa,helm_classic_240130,[],knowledge | |
| 575,j1_grande_v1_17b,0.52,openbookqa,helm_classic_240130,[],knowledge | |
| 579,cohere_large_v20220720_13.1b,0.542,openbookqa,helm_classic_240130,[],knowledge | |
| 581,text_curie_001,0.514,openbookqa,helm_classic_240130,[],knowledge | |
| 582,gpt_neox_20b,0.524,openbookqa,helm_classic_240130,[],knowledge | |
| 584,cohere_medium_v20221108_6.1b,0.538,openbookqa,helm_classic_240130,[],knowledge | |
| 586,tnlg_v2_6.7b,0.478,openbookqa,helm_classic_240130,[],knowledge | |
| 587,j1_large_v1_7.5b,0.514,openbookqa,helm_classic_240130,[],knowledge | |
| 588,gpt_j_6b,0.514,openbookqa,helm_classic_240130,[],knowledge | |
| 590,curie_6.7b,0.502,openbookqa,helm_classic_240130,[],knowledge | |
| 592,cohere_medium_v20220720_6.1b,0.496,openbookqa,helm_classic_240130,[],knowledge | |
| 593,text_babbage_001,0.452,openbookqa,helm_classic_240130,[],knowledge | |
| 598,babbage_1.3b,0.438,openbookqa,helm_classic_240130,[],knowledge | |
| 599,cohere_small_v20220720_410m,0.348,openbookqa,helm_classic_240130,[],knowledge | |
| 600,ada_350m,0.38,openbookqa,helm_classic_240130,[],knowledge | |
| 601,text_ada_001,0.346,openbookqa,helm_classic_240130,[],knowledge | |
| 603,llama_2_70b,0.554,truthfulqa,helm_classic_240130,[],knowledge | |
| 604,llama_65b,0.508,truthfulqa,helm_classic_240130,[],knowledge | |
| 605,text_davinci_002,0.61,truthfulqa,helm_classic_240130,[],knowledge | |
| 606,mistral_v0.1_7b,0.422,truthfulqa,helm_classic_240130,[],knowledge | |
| 607,cohere_command_beta_52.4b,0.269,truthfulqa,helm_classic_240130,[],knowledge | |
| 608,text_davinci_003,0.593,truthfulqa,helm_classic_240130,[],knowledge | |
| 609,jurassic_2_jumbo_178b,0.437,truthfulqa,helm_classic_240130,[],knowledge | |
| 610,llama_2_13b,0.33,truthfulqa,helm_classic_240130,[],knowledge | |
| 611,tnlg_v2_530b,0.251,truthfulqa,helm_classic_240130,[],knowledge | |
| 612,gpt_3.5_turbo_0613,0.339,truthfulqa,helm_classic_240130,[],knowledge | |
| 613,llama_30b,0.344,truthfulqa,helm_classic_240130,[],knowledge | |
| 614,anthropic_lm_v4_s3_52b,0.368,truthfulqa,helm_classic_240130,[],knowledge | |
| 615,gpt_3.5_turbo_0301,0.609,truthfulqa,helm_classic_240130,[],knowledge | |
| 616,jurassic_2_grande_17b,0.348,truthfulqa,helm_classic_240130,[],knowledge | |
| 617,palmyra_x_43b,0.616,truthfulqa,helm_classic_240130,[],knowledge | |
| 618,falcon_40b,0.353,truthfulqa,helm_classic_240130,[],knowledge | |
| 619,falcon_instruct_40b,0.384,truthfulqa,helm_classic_240130,[],knowledge | |
| 620,mpt_instruct_30b,0.234,truthfulqa,helm_classic_240130,[],knowledge | |
| 621,mpt_30b,0.231,truthfulqa,helm_classic_240130,[],knowledge | |
| 622,j1_grande_v2_beta_17b,0.306,truthfulqa,helm_classic_240130,[],knowledge | |
| 623,vicuna_v1.3_13b,0.385,truthfulqa,helm_classic_240130,[],knowledge | |
| 624,cohere_command_beta_6.1b,0.203,truthfulqa,helm_classic_240130,[],knowledge | |
| 625,cohere_xlarge_v20221108_52.4b,0.169,truthfulqa,helm_classic_240130,[],knowledge | |
| 626,luminous_supreme_70b,0.222,truthfulqa,helm_classic_240130,[],knowledge | |
| 627,vicuna_v1.3_7b,0.292,truthfulqa,helm_classic_240130,[],knowledge | |
| 628,opt_175b,0.25,truthfulqa,helm_classic_240130,[],knowledge | |
| 629,llama_2_7b,0.272,truthfulqa,helm_classic_240130,[],knowledge | |
| 630,llama_13b,0.324,truthfulqa,helm_classic_240130,[],knowledge | |
| 631,instructpalmyra_30b,0.185,truthfulqa,helm_classic_240130,[],knowledge | |
| 632,cohere_xlarge_v20220609_52.4b,0.198,truthfulqa,helm_classic_240130,[],knowledge | |
| 633,jurassic_2_large_7.5b,0.245,truthfulqa,helm_classic_240130,[],knowledge | |
| 634,davinci_175b,0.194,truthfulqa,helm_classic_240130,[],knowledge | |
| 635,llama_7b,0.28,truthfulqa,helm_classic_240130,[],knowledge | |
| 636,redpajama_incite_instruct_7b,0.243,truthfulqa,helm_classic_240130,[],knowledge | |
| 637,j1_jumbo_v1_178b,0.175,truthfulqa,helm_classic_240130,[],knowledge | |
| 638,glm_130b,0.218,truthfulqa,helm_classic_240130,[],knowledge | |
| 639,luminous_extended_30b,0.221,truthfulqa,helm_classic_240130,[],knowledge | |
| 640,opt_66b,0.201,truthfulqa,helm_classic_240130,[],knowledge | |
| 641,bloom_176b,0.205,truthfulqa,helm_classic_240130,[],knowledge | |
| 642,j1_grande_v1_17b,0.193,truthfulqa,helm_classic_240130,[],knowledge | |
| 643,alpaca_7b,0.243,truthfulqa,helm_classic_240130,[],knowledge | |
| 644,falcon_7b,0.234,truthfulqa,helm_classic_240130,[],knowledge | |
| 645,redpajama_incite_base_7b,0.205,truthfulqa,helm_classic_240130,[],knowledge | |
| 646,cohere_large_v20220720_13.1b,0.181,truthfulqa,helm_classic_240130,[],knowledge | |
| 647,redpajama_incite_instruct_v1_3b,0.208,truthfulqa,helm_classic_240130,[],knowledge | |
| 648,text_curie_001,0.257,truthfulqa,helm_classic_240130,[],knowledge | |
| 649,gpt_neox_20b,0.216,truthfulqa,helm_classic_240130,[],knowledge | |
| 650,luminous_base_13b,0.182,truthfulqa,helm_classic_240130,[],knowledge | |
| 651,cohere_medium_v20221108_6.1b,0.215,truthfulqa,helm_classic_240130,[],knowledge | |
| 652,redpajama_incite_base_v1_3b,0.277,truthfulqa,helm_classic_240130,[],knowledge | |
| 653,tnlg_v2_6.7b,0.167,truthfulqa,helm_classic_240130,[],knowledge | |
| 654,j1_large_v1_7.5b,0.197,truthfulqa,helm_classic_240130,[],knowledge | |
| 655,gpt_j_6b,0.199,truthfulqa,helm_classic_240130,[],knowledge | |
| 656,pythia_12b,0.177,truthfulqa,helm_classic_240130,[],knowledge | |
| 657,curie_6.7b,0.232,truthfulqa,helm_classic_240130,[],knowledge | |
| 658,falcon_instruct_7b,0.213,truthfulqa,helm_classic_240130,[],knowledge | |
| 659,cohere_medium_v20220720_6.1b,0.19,truthfulqa,helm_classic_240130,[],knowledge | |
| 660,text_babbage_001,0.233,truthfulqa,helm_classic_240130,[],knowledge | |
| 661,t0pp_11b,0.377,truthfulqa,helm_classic_240130,[],knowledge | |
| 662,pythia_6.9b,0.213,truthfulqa,helm_classic_240130,[],knowledge | |
| 663,ul2_20b,0.193,truthfulqa,helm_classic_240130,[],knowledge | |
| 664,t5_11b,0.133,truthfulqa,helm_classic_240130,[],knowledge | |
| 665,babbage_1.3b,0.188,truthfulqa,helm_classic_240130,[],knowledge | |
| 666,cohere_small_v20220720_410m,0.217,truthfulqa,helm_classic_240130,[],knowledge | |
| 667,ada_350m,0.215,truthfulqa,helm_classic_240130,[],knowledge | |
| 668,text_ada_001,0.232,truthfulqa,helm_classic_240130,[],knowledge | |
| 669,yalm_100b,0.202,truthfulqa,helm_classic_240130,[],knowledge | |
| 672,text_davinci_002,0.421,ms_marco_regular,helm_classic_240130,[],other | |
| 674,cohere_command_beta_52.4b,0.472,ms_marco_regular,helm_classic_240130,[],other | |
| 675,text_davinci_003,0.368,ms_marco_regular,helm_classic_240130,[],other | |
| 676,jurassic_2_jumbo_178b,0.398,ms_marco_regular,helm_classic_240130,[],other | |
| 678,tnlg_v2_530b,0.377,ms_marco_regular,helm_classic_240130,[],other | |
| 683,jurassic_2_grande_17b,0.293,ms_marco_regular,helm_classic_240130,[],other | |
| 689,j1_grande_v2_beta_17b,0.285,ms_marco_regular,helm_classic_240130,[],other | |
| 691,cohere_command_beta_6.1b,0.434,ms_marco_regular,helm_classic_240130,[],other | |
| 692,cohere_xlarge_v20221108_52.4b,0.315,ms_marco_regular,helm_classic_240130,[],other | |
| 695,opt_175b,0.288,ms_marco_regular,helm_classic_240130,[],other | |
| 699,cohere_xlarge_v20220609_52.4b,0.273,ms_marco_regular,helm_classic_240130,[],other | |
| 700,jurassic_2_large_7.5b,0.247,ms_marco_regular,helm_classic_240130,[],other | |
| 701,davinci_175b,0.211,ms_marco_regular,helm_classic_240130,[],other | |
| 704,j1_jumbo_v1_178b,0.21,ms_marco_regular,helm_classic_240130,[],other | |
| 707,opt_66b,0.237,ms_marco_regular,helm_classic_240130,[],other | |
| 708,bloom_176b,0.236,ms_marco_regular,helm_classic_240130,[],other | |
| 709,j1_grande_v1_17b,0.161,ms_marco_regular,helm_classic_240130,[],other | |
| 713,cohere_large_v20220720_13.1b,0.19,ms_marco_regular,helm_classic_240130,[],other | |
| 715,text_curie_001,0.271,ms_marco_regular,helm_classic_240130,[],other | |
| 716,gpt_neox_20b,0.184,ms_marco_regular,helm_classic_240130,[],other | |
| 718,cohere_medium_v20221108_6.1b,0.175,ms_marco_regular,helm_classic_240130,[],other | |
| 720,tnlg_v2_6.7b,0.158,ms_marco_regular,helm_classic_240130,[],other | |
| 721,j1_large_v1_7.5b,0.147,ms_marco_regular,helm_classic_240130,[],other | |
| 722,gpt_j_6b,0.152,ms_marco_regular,helm_classic_240130,[],other | |
| 724,curie_6.7b,0.162,ms_marco_regular,helm_classic_240130,[],other | |
| 726,cohere_medium_v20220720_6.1b,0.152,ms_marco_regular,helm_classic_240130,[],other | |
| 727,text_babbage_001,0.208,ms_marco_regular,helm_classic_240130,[],other | |
| 732,babbage_1.3b,0.122,ms_marco_regular,helm_classic_240130,[],other | |
| 734,ada_350m,0.102,ms_marco_regular,helm_classic_240130,[],other | |
| 735,text_ada_001,0.134,ms_marco_regular,helm_classic_240130,[],other | |
| 739,text_davinci_002,0.664,ms_marco_trec,helm_classic_240130,[],other | |
| 741,cohere_command_beta_52.4b,0.762,ms_marco_trec,helm_classic_240130,[],other | |
| 742,text_davinci_003,0.644,ms_marco_trec,helm_classic_240130,[],other | |
| 743,jurassic_2_jumbo_178b,0.661,ms_marco_trec,helm_classic_240130,[],other | |
| 745,tnlg_v2_530b,0.643,ms_marco_trec,helm_classic_240130,[],other | |
| 750,jurassic_2_grande_17b,0.514,ms_marco_trec,helm_classic_240130,[],other | |
| 756,j1_grande_v2_beta_17b,0.46,ms_marco_trec,helm_classic_240130,[],other | |
| 758,cohere_command_beta_6.1b,0.709,ms_marco_trec,helm_classic_240130,[],other | |
| 759,cohere_xlarge_v20221108_52.4b,0.55,ms_marco_trec,helm_classic_240130,[],other | |
| 762,opt_175b,0.448,ms_marco_trec,helm_classic_240130,[],other | |
| 766,cohere_xlarge_v20220609_52.4b,0.459,ms_marco_trec,helm_classic_240130,[],other | |
| 767,jurassic_2_large_7.5b,0.464,ms_marco_trec,helm_classic_240130,[],other | |
| 768,davinci_175b,0.378,ms_marco_trec,helm_classic_240130,[],other | |
| 771,j1_jumbo_v1_178b,0.363,ms_marco_trec,helm_classic_240130,[],other | |
| 774,opt_66b,0.482,ms_marco_trec,helm_classic_240130,[],other | |
| 775,bloom_176b,0.386,ms_marco_trec,helm_classic_240130,[],other | |
| 776,j1_grande_v1_17b,0.341,ms_marco_trec,helm_classic_240130,[],other | |
| 780,cohere_large_v20220720_13.1b,0.33,ms_marco_trec,helm_classic_240130,[],other | |
| 782,text_curie_001,0.507,ms_marco_trec,helm_classic_240130,[],other | |
| 783,gpt_neox_20b,0.398,ms_marco_trec,helm_classic_240130,[],other | |
| 785,cohere_medium_v20221108_6.1b,0.373,ms_marco_trec,helm_classic_240130,[],other | |
| 787,tnlg_v2_6.7b,0.332,ms_marco_trec,helm_classic_240130,[],other | |
| 788,j1_large_v1_7.5b,0.292,ms_marco_trec,helm_classic_240130,[],other | |
| 789,gpt_j_6b,0.345,ms_marco_trec,helm_classic_240130,[],other | |
| 791,curie_6.7b,0.3,ms_marco_trec,helm_classic_240130,[],other | |
| 793,cohere_medium_v20220720_6.1b,0.374,ms_marco_trec,helm_classic_240130,[],other | |
| 794,text_babbage_001,0.449,ms_marco_trec,helm_classic_240130,[],other | |
| 799,babbage_1.3b,0.317,ms_marco_trec,helm_classic_240130,[],other | |
| 800,cohere_small_v20220720_410m,0.304,ms_marco_trec,helm_classic_240130,[],other | |
| 801,ada_350m,0.29,ms_marco_trec,helm_classic_240130,[],other | |
| 802,text_ada_001,0.302,ms_marco_trec,helm_classic_240130,[],other | |
| 806,text_davinci_002,0.153,cnn/dailymail,helm_classic_240130,[],other | |
| 808,cohere_command_beta_52.4b,0.161,cnn/dailymail,helm_classic_240130,[],other | |
| 809,text_davinci_003,0.156,cnn/dailymail,helm_classic_240130,[],other | |
| 810,jurassic_2_jumbo_178b,0.149,cnn/dailymail,helm_classic_240130,[],other | |
| 812,tnlg_v2_530b,0.161,cnn/dailymail,helm_classic_240130,[],other | |
| 815,anthropic_lm_v4_s3_52b,0.154,cnn/dailymail,helm_classic_240130,[],other | |
| 817,jurassic_2_grande_17b,0.144,cnn/dailymail,helm_classic_240130,[],other | |
| 818,palmyra_x_43b,0.049,cnn/dailymail,helm_classic_240130,[],other | |
| 823,j1_grande_v2_beta_17b,0.146,cnn/dailymail,helm_classic_240130,[],other | |
| 825,cohere_command_beta_6.1b,0.153,cnn/dailymail,helm_classic_240130,[],other | |
| 826,cohere_xlarge_v20221108_52.4b,0.153,cnn/dailymail,helm_classic_240130,[],other | |
| 827,luminous_supreme_70b,0.15,cnn/dailymail,helm_classic_240130,[],other | |
| 829,opt_175b,0.146,cnn/dailymail,helm_classic_240130,[],other | |
| 832,instructpalmyra_30b,0.152,cnn/dailymail,helm_classic_240130,[],other | |
| 833,cohere_xlarge_v20220609_52.4b,0.144,cnn/dailymail,helm_classic_240130,[],other | |
| 834,jurassic_2_large_7.5b,0.136,cnn/dailymail,helm_classic_240130,[],other | |
| 835,davinci_175b,0.127,cnn/dailymail,helm_classic_240130,[],other | |
| 838,j1_jumbo_v1_178b,0.144,cnn/dailymail,helm_classic_240130,[],other | |
| 839,glm_130b,0.154,cnn/dailymail,helm_classic_240130,[],other | |
| 840,luminous_extended_30b,0.139,cnn/dailymail,helm_classic_240130,[],other | |
| 841,opt_66b,0.136,cnn/dailymail,helm_classic_240130,[],other | |
| 842,bloom_176b,0.08,cnn/dailymail,helm_classic_240130,[],other | |
| 843,j1_grande_v1_17b,0.143,cnn/dailymail,helm_classic_240130,[],other | |
| 847,cohere_large_v20220720_13.1b,0.126,cnn/dailymail,helm_classic_240130,[],other | |
| 849,text_curie_001,0.152,cnn/dailymail,helm_classic_240130,[],other | |
| 850,gpt_neox_20b,0.123,cnn/dailymail,helm_classic_240130,[],other | |
| 851,luminous_base_13b,0.11,cnn/dailymail,helm_classic_240130,[],other | |
| 852,cohere_medium_v20221108_6.1b,0.121,cnn/dailymail,helm_classic_240130,[],other | |
| 854,tnlg_v2_6.7b,0.146,cnn/dailymail,helm_classic_240130,[],other | |
| 855,j1_large_v1_7.5b,0.134,cnn/dailymail,helm_classic_240130,[],other | |
| 856,gpt_j_6b,0.131,cnn/dailymail,helm_classic_240130,[],other | |
| 858,curie_6.7b,0.113,cnn/dailymail,helm_classic_240130,[],other | |
| 860,cohere_medium_v20220720_6.1b,0.077,cnn/dailymail,helm_classic_240130,[],other | |
| 861,text_babbage_001,0.151,cnn/dailymail,helm_classic_240130,[],other | |
| 862,t0pp_11b,0.122,cnn/dailymail,helm_classic_240130,[],other | |
| 864,ul2_20b,0.03,cnn/dailymail,helm_classic_240130,[],other | |
| 865,t5_11b,0.043,cnn/dailymail,helm_classic_240130,[],other | |
| 866,babbage_1.3b,0.079,cnn/dailymail,helm_classic_240130,[],other | |
| 867,cohere_small_v20220720_410m,0.063,cnn/dailymail,helm_classic_240130,[],other | |
| 868,ada_350m,0.09,cnn/dailymail,helm_classic_240130,[],other | |
| 869,text_ada_001,0.136,cnn/dailymail,helm_classic_240130,[],other | |
| 870,yalm_100b,0.017,cnn/dailymail,helm_classic_240130,[],other | |
| 873,text_davinci_002,0.144,xsum,helm_classic_240130,[],other | |
| 875,cohere_command_beta_52.4b,0.152,xsum,helm_classic_240130,[],other | |
| 876,text_davinci_003,0.124,xsum,helm_classic_240130,[],other | |
| 877,jurassic_2_jumbo_178b,0.182,xsum,helm_classic_240130,[],other | |
| 879,tnlg_v2_530b,0.169,xsum,helm_classic_240130,[],other | |
| 882,anthropic_lm_v4_s3_52b,0.134,xsum,helm_classic_240130,[],other | |
| 884,jurassic_2_grande_17b,0.167,xsum,helm_classic_240130,[],other | |
| 885,palmyra_x_43b,0.149,xsum,helm_classic_240130,[],other | |
| 890,j1_grande_v2_beta_17b,0.152,xsum,helm_classic_240130,[],other | |
| 892,cohere_command_beta_6.1b,0.122,xsum,helm_classic_240130,[],other | |
| 893,cohere_xlarge_v20221108_52.4b,0.153,xsum,helm_classic_240130,[],other | |
| 894,luminous_supreme_70b,0.136,xsum,helm_classic_240130,[],other | |
| 896,opt_175b,0.155,xsum,helm_classic_240130,[],other | |
| 899,instructpalmyra_30b,0.104,xsum,helm_classic_240130,[],other | |
| 900,cohere_xlarge_v20220609_52.4b,0.129,xsum,helm_classic_240130,[],other | |
| 901,jurassic_2_large_7.5b,0.142,xsum,helm_classic_240130,[],other | |
| 902,davinci_175b,0.126,xsum,helm_classic_240130,[],other | |
| 905,j1_jumbo_v1_178b,0.129,xsum,helm_classic_240130,[],other | |
| 906,glm_130b,0.132,xsum,helm_classic_240130,[],other | |
| 907,luminous_extended_30b,0.124,xsum,helm_classic_240130,[],other | |
| 908,opt_66b,0.126,xsum,helm_classic_240130,[],other | |
| 909,bloom_176b,0.03,xsum,helm_classic_240130,[],other | |
| 910,j1_grande_v1_17b,0.122,xsum,helm_classic_240130,[],other | |
| 914,cohere_large_v20220720_13.1b,0.108,xsum,helm_classic_240130,[],other | |
| 916,text_curie_001,0.076,xsum,helm_classic_240130,[],other | |
| 917,gpt_neox_20b,0.102,xsum,helm_classic_240130,[],other | |
| 918,luminous_base_13b,0.105,xsum,helm_classic_240130,[],other | |
| 919,cohere_medium_v20221108_6.1b,0.099,xsum,helm_classic_240130,[],other | |
| 921,tnlg_v2_6.7b,0.11,xsum,helm_classic_240130,[],other | |
| 922,j1_large_v1_7.5b,0.102,xsum,helm_classic_240130,[],other | |
| 923,gpt_j_6b,0.096,xsum,helm_classic_240130,[],other | |
| 925,curie_6.7b,0.091,xsum,helm_classic_240130,[],other | |
| 927,cohere_medium_v20220720_6.1b,0.087,xsum,helm_classic_240130,[],other | |
| 928,text_babbage_001,0.046,xsum,helm_classic_240130,[],other | |
| 929,t0pp_11b,0.09,xsum,helm_classic_240130,[],other | |
| 931,ul2_20b,0.058,xsum,helm_classic_240130,[],other | |
| 932,t5_11b,0.015,xsum,helm_classic_240130,[],other | |
| 933,babbage_1.3b,0.045,xsum,helm_classic_240130,[],other | |
| 934,cohere_small_v20220720_410m,0.033,xsum,helm_classic_240130,[],other | |
| 935,ada_350m,0.022,xsum,helm_classic_240130,[],other | |
| 936,text_ada_001,0.034,xsum,helm_classic_240130,[],other | |
| 937,yalm_100b,0.021,xsum,helm_classic_240130,[],other | |
| 938,llama_2_70b,0.961,imdb,helm_classic_240130,[],other | |
| 939,llama_65b,0.962,imdb,helm_classic_240130,[],other | |
| 940,text_davinci_002,0.948,imdb,helm_classic_240130,[],other | |
| 941,mistral_v0.1_7b,0.962,imdb,helm_classic_240130,[],other | |
| 942,cohere_command_beta_52.4b,0.96,imdb,helm_classic_240130,[],other | |
| 943,text_davinci_003,0.848,imdb,helm_classic_240130,[],other | |
| 944,jurassic_2_jumbo_178b,0.938,imdb,helm_classic_240130,[],other | |
| 945,llama_2_13b,0.962,imdb,helm_classic_240130,[],other | |
| 946,tnlg_v2_530b,0.941,imdb,helm_classic_240130,[],other | |
| 947,gpt_3.5_turbo_0613,0.943,imdb,helm_classic_240130,[],other | |
| 948,llama_30b,0.927,imdb,helm_classic_240130,[],other | |
| 949,anthropic_lm_v4_s3_52b,0.934,imdb,helm_classic_240130,[],other | |
| 950,gpt_3.5_turbo_0301,0.899,imdb,helm_classic_240130,[],other | |
| 951,jurassic_2_grande_17b,0.938,imdb,helm_classic_240130,[],other | |
| 952,palmyra_x_43b,0.935,imdb,helm_classic_240130,[],other | |
| 953,falcon_40b,0.959,imdb,helm_classic_240130,[],other | |
| 954,falcon_instruct_40b,0.959,imdb,helm_classic_240130,[],other | |
| 955,mpt_instruct_30b,0.956,imdb,helm_classic_240130,[],other | |
| 956,mpt_30b,0.959,imdb,helm_classic_240130,[],other | |
| 957,j1_grande_v2_beta_17b,0.957,imdb,helm_classic_240130,[],other | |
| 958,vicuna_v1.3_13b,0.762,imdb,helm_classic_240130,[],other | |
| 959,cohere_command_beta_6.1b,0.961,imdb,helm_classic_240130,[],other | |
| 960,cohere_xlarge_v20221108_52.4b,0.956,imdb,helm_classic_240130,[],other | |
| 961,luminous_supreme_70b,0.959,imdb,helm_classic_240130,[],other | |
| 962,vicuna_v1.3_7b,0.916,imdb,helm_classic_240130,[],other | |
| 963,opt_175b,0.947,imdb,helm_classic_240130,[],other | |
| 964,llama_2_7b,0.907,imdb,helm_classic_240130,[],other | |
| 965,llama_13b,0.928,imdb,helm_classic_240130,[],other | |
| 966,instructpalmyra_30b,0.94,imdb,helm_classic_240130,[],other | |
| 967,cohere_xlarge_v20220609_52.4b,0.956,imdb,helm_classic_240130,[],other | |
| 968,jurassic_2_large_7.5b,0.956,imdb,helm_classic_240130,[],other | |
| 969,davinci_175b,0.933,imdb,helm_classic_240130,[],other | |
| 970,llama_7b,0.947,imdb,helm_classic_240130,[],other | |
| 971,redpajama_incite_instruct_7b,0.927,imdb,helm_classic_240130,[],other | |
| 972,j1_jumbo_v1_178b,0.943,imdb,helm_classic_240130,[],other | |
| 973,glm_130b,0.955,imdb,helm_classic_240130,[],other | |
| 974,luminous_extended_30b,0.947,imdb,helm_classic_240130,[],other | |
| 975,opt_66b,0.917,imdb,helm_classic_240130,[],other | |
| 976,bloom_176b,0.945,imdb,helm_classic_240130,[],other | |
| 977,j1_grande_v1_17b,0.953,imdb,helm_classic_240130,[],other | |
| 978,alpaca_7b,0.738,imdb,helm_classic_240130,[],other | |
| 979,falcon_7b,0.836,imdb,helm_classic_240130,[],other | |
| 980,redpajama_incite_base_7b,0.752,imdb,helm_classic_240130,[],other | |
| 981,cohere_large_v20220720_13.1b,0.933,imdb,helm_classic_240130,[],other | |
| 982,redpajama_incite_instruct_v1_3b,0.894,imdb,helm_classic_240130,[],other | |
| 983,text_curie_001,0.923,imdb,helm_classic_240130,[],other | |
| 984,gpt_neox_20b,0.948,imdb,helm_classic_240130,[],other | |
| 985,luminous_base_13b,0.939,imdb,helm_classic_240130,[],other | |
| 986,cohere_medium_v20221108_6.1b,0.935,imdb,helm_classic_240130,[],other | |
| 987,redpajama_incite_base_v1_3b,0.907,imdb,helm_classic_240130,[],other | |
| 988,tnlg_v2_6.7b,0.927,imdb,helm_classic_240130,[],other | |
| 989,j1_large_v1_7.5b,0.956,imdb,helm_classic_240130,[],other | |
| 990,gpt_j_6b,0.939,imdb,helm_classic_240130,[],other | |
| 991,pythia_12b,0.931,imdb,helm_classic_240130,[],other | |
| 992,curie_6.7b,0.889,imdb,helm_classic_240130,[],other | |
| 993,falcon_instruct_7b,0.852,imdb,helm_classic_240130,[],other | |
| 994,cohere_medium_v20220720_6.1b,0.935,imdb,helm_classic_240130,[],other | |
| 995,text_babbage_001,0.913,imdb,helm_classic_240130,[],other | |
| 996,t0pp_11b,0.207,imdb,helm_classic_240130,[],other | |
| 997,pythia_6.9b,0.928,imdb,helm_classic_240130,[],other | |
| 998,ul2_20b,0.337,imdb,helm_classic_240130,[],other | |
| 999,t5_11b,0.379,imdb,helm_classic_240130,[],other | |
| 1000,babbage_1.3b,0.597,imdb,helm_classic_240130,[],other | |
| 1001,cohere_small_v20220720_410m,0.578,imdb,helm_classic_240130,[],other | |
| 1002,ada_350m,0.849,imdb,helm_classic_240130,[],other | |
| 1003,text_ada_001,0.822,imdb,helm_classic_240130,[],other | |
| 1004,yalm_100b,0.836,imdb,helm_classic_240130,[],other | |
| 1005,llama_2_70b,0.652,civilcomments,helm_classic_240130,[],other | |
| 1006,llama_65b,0.655,civilcomments,helm_classic_240130,[],other | |
| 1007,text_davinci_002,0.668,civilcomments,helm_classic_240130,[],other | |
| 1008,mistral_v0.1_7b,0.624,civilcomments,helm_classic_240130,[],other | |
| 1009,cohere_command_beta_52.4b,0.601,civilcomments,helm_classic_240130,[],other | |
| 1010,text_davinci_003,0.684,civilcomments,helm_classic_240130,[],other | |
| 1011,jurassic_2_jumbo_178b,0.57,civilcomments,helm_classic_240130,[],other | |
| 1012,llama_2_13b,0.588,civilcomments,helm_classic_240130,[],other | |
| 1013,tnlg_v2_530b,0.601,civilcomments,helm_classic_240130,[],other | |
| 1014,gpt_3.5_turbo_0613,0.696,civilcomments,helm_classic_240130,[],other | |
| 1015,llama_30b,0.549,civilcomments,helm_classic_240130,[],other | |
| 1016,anthropic_lm_v4_s3_52b,0.61,civilcomments,helm_classic_240130,[],other | |
| 1017,gpt_3.5_turbo_0301,0.674,civilcomments,helm_classic_240130,[],other | |
| 1018,jurassic_2_grande_17b,0.547,civilcomments,helm_classic_240130,[],other | |
| 1019,palmyra_x_43b,0.008,civilcomments,helm_classic_240130,[],other | |
| 1020,falcon_40b,0.552,civilcomments,helm_classic_240130,[],other | |
| 1021,falcon_instruct_40b,0.603,civilcomments,helm_classic_240130,[],other | |
| 1022,mpt_instruct_30b,0.573,civilcomments,helm_classic_240130,[],other | |
| 1023,mpt_30b,0.599,civilcomments,helm_classic_240130,[],other | |
| 1024,j1_grande_v2_beta_17b,0.546,civilcomments,helm_classic_240130,[],other | |
| 1025,vicuna_v1.3_13b,0.645,civilcomments,helm_classic_240130,[],other | |
| 1026,cohere_command_beta_6.1b,0.54,civilcomments,helm_classic_240130,[],other | |
| 1027,cohere_xlarge_v20221108_52.4b,0.524,civilcomments,helm_classic_240130,[],other | |
| 1028,luminous_supreme_70b,0.562,civilcomments,helm_classic_240130,[],other | |
| 1029,vicuna_v1.3_7b,0.62,civilcomments,helm_classic_240130,[],other | |
| 1030,opt_175b,0.505,civilcomments,helm_classic_240130,[],other | |
| 1031,llama_2_7b,0.562,civilcomments,helm_classic_240130,[],other | |
| 1032,llama_13b,0.6,civilcomments,helm_classic_240130,[],other | |
| 1033,instructpalmyra_30b,0.555,civilcomments,helm_classic_240130,[],other | |
| 1034,cohere_xlarge_v20220609_52.4b,0.532,civilcomments,helm_classic_240130,[],other | |
| 1035,jurassic_2_large_7.5b,0.57,civilcomments,helm_classic_240130,[],other | |
| 1036,davinci_175b,0.532,civilcomments,helm_classic_240130,[],other | |
| 1037,llama_7b,0.563,civilcomments,helm_classic_240130,[],other | |
| 1038,redpajama_incite_instruct_7b,0.664,civilcomments,helm_classic_240130,[],other | |
| 1039,j1_jumbo_v1_178b,0.553,civilcomments,helm_classic_240130,[],other | |
| 1040,glm_130b,0.5,civilcomments,helm_classic_240130,[],other | |
| 1041,luminous_extended_30b,0.524,civilcomments,helm_classic_240130,[],other | |
| 1042,opt_66b,0.506,civilcomments,helm_classic_240130,[],other | |
| 1043,bloom_176b,0.62,civilcomments,helm_classic_240130,[],other | |
| 1044,j1_grande_v1_17b,0.529,civilcomments,helm_classic_240130,[],other | |
| 1045,alpaca_7b,0.566,civilcomments,helm_classic_240130,[],other | |
| 1046,falcon_7b,0.514,civilcomments,helm_classic_240130,[],other | |
| 1047,redpajama_incite_base_7b,0.547,civilcomments,helm_classic_240130,[],other | |
| 1048,cohere_large_v20220720_13.1b,0.507,civilcomments,helm_classic_240130,[],other | |
| 1049,redpajama_incite_instruct_v1_3b,0.549,civilcomments,helm_classic_240130,[],other | |
| 1050,text_curie_001,0.537,civilcomments,helm_classic_240130,[],other | |
| 1051,gpt_neox_20b,0.516,civilcomments,helm_classic_240130,[],other | |
| 1052,luminous_base_13b,0.544,civilcomments,helm_classic_240130,[],other | |
| 1053,cohere_medium_v20221108_6.1b,0.5,civilcomments,helm_classic_240130,[],other | |
| 1054,redpajama_incite_base_v1_3b,0.549,civilcomments,helm_classic_240130,[],other | |
| 1055,tnlg_v2_6.7b,0.532,civilcomments,helm_classic_240130,[],other | |
| 1056,j1_large_v1_7.5b,0.532,civilcomments,helm_classic_240130,[],other | |
| 1057,gpt_j_6b,0.52,civilcomments,helm_classic_240130,[],other | |
| 1058,pythia_12b,0.531,civilcomments,helm_classic_240130,[],other | |
| 1059,curie_6.7b,0.539,civilcomments,helm_classic_240130,[],other | |
| 1060,falcon_instruct_7b,0.511,civilcomments,helm_classic_240130,[],other | |
| 1061,cohere_medium_v20220720_6.1b,0.504,civilcomments,helm_classic_240130,[],other | |
| 1062,text_babbage_001,0.499,civilcomments,helm_classic_240130,[],other | |
| 1063,t0pp_11b,0.234,civilcomments,helm_classic_240130,[],other | |
| 1064,pythia_6.9b,0.511,civilcomments,helm_classic_240130,[],other | |
| 1065,ul2_20b,0.521,civilcomments,helm_classic_240130,[],other | |
| 1066,t5_11b,0.509,civilcomments,helm_classic_240130,[],other | |
| 1067,babbage_1.3b,0.519,civilcomments,helm_classic_240130,[],other | |
| 1068,cohere_small_v20220720_410m,0.501,civilcomments,helm_classic_240130,[],other | |
| 1069,ada_350m,0.517,civilcomments,helm_classic_240130,[],other | |
| 1070,text_ada_001,0.503,civilcomments,helm_classic_240130,[],other | |
| 1071,yalm_100b,0.49,civilcomments,helm_classic_240130,[],other | |
| 1072,llama_2_70b,0.727,raft,helm_classic_240130,[],other | |
| 1073,llama_65b,0.702,raft,helm_classic_240130,[],other | |
| 1074,text_davinci_002,0.733,raft,helm_classic_240130,[],other | |
| 1075,mistral_v0.1_7b,0.707,raft,helm_classic_240130,[],other | |
| 1076,cohere_command_beta_52.4b,0.667,raft,helm_classic_240130,[],other | |
| 1077,text_davinci_003,0.759,raft,helm_classic_240130,[],other | |
| 1078,jurassic_2_jumbo_178b,0.746,raft,helm_classic_240130,[],other | |
| 1079,llama_2_13b,0.707,raft,helm_classic_240130,[],other | |
| 1080,tnlg_v2_530b,0.679,raft,helm_classic_240130,[],other | |
| 1081,gpt_3.5_turbo_0613,0.748,raft,helm_classic_240130,[],other | |
| 1082,llama_30b,0.752,raft,helm_classic_240130,[],other | |
| 1083,anthropic_lm_v4_s3_52b,0.699,raft,helm_classic_240130,[],other | |
| 1084,gpt_3.5_turbo_0301,0.768,raft,helm_classic_240130,[],other | |
| 1085,jurassic_2_grande_17b,0.712,raft,helm_classic_240130,[],other | |
| 1086,palmyra_x_43b,0.701,raft,helm_classic_240130,[],other | |
| 1087,falcon_40b,0.661,raft,helm_classic_240130,[],other | |
| 1088,falcon_instruct_40b,0.586,raft,helm_classic_240130,[],other | |
| 1089,mpt_instruct_30b,0.68,raft,helm_classic_240130,[],other | |
| 1090,mpt_30b,0.723,raft,helm_classic_240130,[],other | |
| 1091,j1_grande_v2_beta_17b,0.679,raft,helm_classic_240130,[],other | |
| 1092,vicuna_v1.3_13b,0.657,raft,helm_classic_240130,[],other | |
| 1093,cohere_command_beta_6.1b,0.634,raft,helm_classic_240130,[],other | |
| 1094,cohere_xlarge_v20221108_52.4b,0.624,raft,helm_classic_240130,[],other | |
| 1095,luminous_supreme_70b,0.653,raft,helm_classic_240130,[],other | |
| 1096,vicuna_v1.3_7b,0.693,raft,helm_classic_240130,[],other | |
| 1097,opt_175b,0.606,raft,helm_classic_240130,[],other | |
| 1098,llama_2_7b,0.643,raft,helm_classic_240130,[],other | |
| 1099,llama_13b,0.643,raft,helm_classic_240130,[],other | |
| 1100,instructpalmyra_30b,0.652,raft,helm_classic_240130,[],other | |
| 1101,cohere_xlarge_v20220609_52.4b,0.633,raft,helm_classic_240130,[],other | |
| 1102,jurassic_2_large_7.5b,0.622,raft,helm_classic_240130,[],other | |
| 1103,davinci_175b,0.642,raft,helm_classic_240130,[],other | |
| 1104,llama_7b,0.573,raft,helm_classic_240130,[],other | |
| 1105,redpajama_incite_instruct_7b,0.695,raft,helm_classic_240130,[],other | |
| 1106,j1_jumbo_v1_178b,0.681,raft,helm_classic_240130,[],other | |
| 1107,glm_130b,0.598,raft,helm_classic_240130,[],other | |
| 1108,luminous_extended_30b,0.523,raft,helm_classic_240130,[],other | |
| 1109,opt_66b,0.557,raft,helm_classic_240130,[],other | |
| 1110,bloom_176b,0.592,raft,helm_classic_240130,[],other | |
| 1111,j1_grande_v1_17b,0.658,raft,helm_classic_240130,[],other | |
| 1112,alpaca_7b,0.486,raft,helm_classic_240130,[],other | |
| 1113,falcon_7b,0.602,raft,helm_classic_240130,[],other | |
| 1114,redpajama_incite_base_7b,0.648,raft,helm_classic_240130,[],other | |
| 1115,cohere_large_v20220720_13.1b,0.596,raft,helm_classic_240130,[],other | |
| 1116,redpajama_incite_instruct_v1_3b,0.661,raft,helm_classic_240130,[],other | |
| 1117,text_curie_001,0.489,raft,helm_classic_240130,[],other | |
| 1118,gpt_neox_20b,0.505,raft,helm_classic_240130,[],other | |
| 1119,luminous_base_13b,0.473,raft,helm_classic_240130,[],other | |
| 1120,cohere_medium_v20221108_6.1b,0.591,raft,helm_classic_240130,[],other | |
| 1121,redpajama_incite_base_v1_3b,0.502,raft,helm_classic_240130,[],other | |
| 1122,tnlg_v2_6.7b,0.525,raft,helm_classic_240130,[],other | |
| 1123,j1_large_v1_7.5b,0.545,raft,helm_classic_240130,[],other | |
| 1124,gpt_j_6b,0.619,raft,helm_classic_240130,[],other | |
| 1125,pythia_12b,0.514,raft,helm_classic_240130,[],other | |
| 1126,curie_6.7b,0.49,raft,helm_classic_240130,[],other | |
| 1127,falcon_instruct_7b,0.523,raft,helm_classic_240130,[],other | |
| 1128,cohere_medium_v20220720_6.1b,0.52,raft,helm_classic_240130,[],other | |
| 1129,text_babbage_001,0.509,raft,helm_classic_240130,[],other | |
| 1130,t0pp_11b,0.118,raft,helm_classic_240130,[],other | |
| 1131,pythia_6.9b,0.502,raft,helm_classic_240130,[],other | |
| 1132,ul2_20b,0.404,raft,helm_classic_240130,[],other | |
| 1133,t5_11b,0.37,raft,helm_classic_240130,[],other | |
| 1134,babbage_1.3b,0.455,raft,helm_classic_240130,[],other | |
| 1135,cohere_small_v20220720_410m,0.492,raft,helm_classic_240130,[],other | |
| 1136,ada_350m,0.423,raft,helm_classic_240130,[],other | |
| 1137,text_ada_001,0.406,raft,helm_classic_240130,[],other | |
| 1138,yalm_100b,0.395,raft,helm_classic_240130,[],other | |
| 0,phi_1,1.1,grounding,biggen_240612,[],other | |
| 1,phi_1_5,2.425,grounding,biggen_240612,[],other | |
| 2,phi_2,3.05,grounding,biggen_240612,[],other | |
| 3,qwen1.5_0.5b,1.85,grounding,biggen_240612,[],other | |
| 4,qwen1.5_1.8b,2.425,grounding,biggen_240612,[],other | |
| 5,qwen1.5_4b,2.85,grounding,biggen_240612,[],other | |
| 6,gemma_2b,2.163,grounding,biggen_240612,[],other | |
| 7,olmo_1b,1.675,grounding,biggen_240612,[],other | |
| 8,qwen1.5_0.5b_chat,2.075,grounding,biggen_240612,[],other | |
| 9,qwen1.5_1.8b_chat,2.75,grounding,biggen_240612,[],other | |
| 10,qwen1.5_4b_chat,2.862,grounding,biggen_240612,[],other | |
| 11,phi_3_mini_4k_instruct,3.675,grounding,biggen_240612,[],other | |
| 12,phi_3_mini_128k_instruct,3.5,grounding,biggen_240612,[],other | |
| 13,gemma_2b_it,2.825,grounding,biggen_240612,[],other | |
| 14,gemma_1.1_2b_it,2.812,grounding,biggen_240612,[],other | |
| 15,gemma_7b,1.288,grounding,biggen_240612,[],other | |
| 16,mistral_7b_v0.1,3.15,grounding,biggen_240612,[],other | |
| 17,mistral_7b_v0.2,3.038,grounding,biggen_240612,[],other | |
| 18,qwen1.5_7b,2.9,grounding,biggen_240612,[],other | |
| 19,yi_6b,2.688,grounding,biggen_240612,[],other | |
| 20,llama_2_7b,2.325,grounding,biggen_240612,[],other | |
| 21,codellama_7b,1.875,grounding,biggen_240612,[],other | |
| 22,meta_llama_3_8b,3.025,grounding,biggen_240612,[],other | |
| 23,llemma_7b,2.237,grounding,biggen_240612,[],other | |
| 24,olmo_7b,2.075,grounding,biggen_240612,[],other | |
| 25,gemma_7b_it,3.212,grounding,biggen_240612,[],other | |
| 26,gemma_1.1_7b_it,3.5,grounding,biggen_240612,[],other | |
| 27,mistral_7b_instruct_v0.2,3.612,grounding,biggen_240612,[],other | |
| 28,qwen1.5_7b_chat,3.575,grounding,biggen_240612,[],other | |
| 29,yi_6b_chat,3.062,grounding,biggen_240612,[],other | |
| 30,llama_2_7b_chat,3.25,grounding,biggen_240612,[],other | |
| 31,codellama_7b_instruct,3.1,grounding,biggen_240612,[],other | |
| 32,meta_llama_3_8b_instruct,3.975,grounding,biggen_240612,[],other | |
| 33,olmo_7b_sft,2.825,grounding,biggen_240612,[],other | |
| 34,olmo_7b_instruct,2.925,grounding,biggen_240612,[],other | |
| 35,tulu_2_7b,2.788,grounding,biggen_240612,[],other | |
| 36,tulu_2_dpo_7b,3.2,grounding,biggen_240612,[],other | |
| 37,codetulu_2_7b,2.862,grounding,biggen_240612,[],other | |
| 38,orca_2_7b,2.3,grounding,biggen_240612,[],other | |
| 39,openchat_3.5_0106,3.575,grounding,biggen_240612,[],other | |
| 40,openhermes_2_mistral_7b,3.388,grounding,biggen_240612,[],other | |
| 41,openhermes_2.5_mistral_7b,3.3,grounding,biggen_240612,[],other | |
| 42,nous_hermes_2_mistral_7b_dpo,3.525,grounding,biggen_240612,[],other | |
| 43,starling_lm_7b_alpha,3.638,grounding,biggen_240612,[],other | |
| 44,starling_lm_7b_beta,3.737,grounding,biggen_240612,[],other | |
| 45,mistral_orpo_alpha,3.35,grounding,biggen_240612,[],other | |
| 46,mistral_orpo_beta,3.487,grounding,biggen_240612,[],other | |
| 47,zephyr_7b_beta,3.362,grounding,biggen_240612,[],other | |
| 48,qwen1.5_14b,3.413,grounding,biggen_240612,[],other | |
| 49,llama_2_13b,2.763,grounding,biggen_240612,[],other | |
| 50,codellama_13b,2.2,grounding,biggen_240612,[],other | |
| 51,solar_10.7b_v1.0,3.212,grounding,biggen_240612,[],other | |
| 52,qwen1.5_14b_chat,3.612,grounding,biggen_240612,[],other | |
| 53,solar_10.7b_instruct_v1.0,3.663,grounding,biggen_240612,[],other | |
| 54,aya_101,1.25,grounding,biggen_240612,[],other | |
| 55,llama_2_13b_chat,3.538,grounding,biggen_240612,[],other | |
| 56,codellama_13b_instruct,3.075,grounding,biggen_240612,[],other | |
| 57,tulu_2_13b,2.975,grounding,biggen_240612,[],other | |
| 58,tulu_2_dpo_13b,3.487,grounding,biggen_240612,[],other | |
| 59,codetulu_2_13b,3.1,grounding,biggen_240612,[],other | |
| 60,orca_2_13b,2.825,grounding,biggen_240612,[],other | |
| 61,yi_34b,3.388,grounding,biggen_240612,[],other | |
| 62,llemma_34b,2.812,grounding,biggen_240612,[],other | |
| 63,qwen1.5_32b,3.3,grounding,biggen_240612,[],other | |
| 64,codellama_34b,2.65,grounding,biggen_240612,[],other | |
| 65,mixtral_8x7b_v0.1,3.663,grounding,biggen_240612,[],other | |
| 66,yi_34b_chat,3.7,grounding,biggen_240612,[],other | |
| 67,nous_hermes_2_yi_34b,3.175,grounding,biggen_240612,[],other | |
| 68,codellama_34b_instruct,3.337,grounding,biggen_240612,[],other | |
| 69,codetulu_2_34b,3.275,grounding,biggen_240612,[],other | |
| 70,qwen1.5_32b_chat,3.712,grounding,biggen_240612,[],other | |
| 71,mixtral_8x7b_instruct_v0.1,3.862,grounding,biggen_240612,[],other | |
| 72,nous_hermes_2_mixtral_8x7b_sft,3.587,grounding,biggen_240612,[],other | |
| 73,nous_hermes_2_mixtral_8x7b_dpo,3.612,grounding,biggen_240612,[],other | |
| 74,c4ai_command_r_v01,3.688,grounding,biggen_240612,[],other | |
| 75,llama_2_70b,3.288,grounding,biggen_240612,[],other | |
| 76,codellama_70b,2.812,grounding,biggen_240612,[],other | |
| 77,mixtral_8x22b_v0.1_awq,3.475,grounding,biggen_240612,[],other | |
| 78,meta_llama_3_70b,3.263,grounding,biggen_240612,[],other | |
| 79,qwen1.5_72b,3.362,grounding,biggen_240612,[],other | |
| 80,llama_2_70b_chat,3.612,grounding,biggen_240612,[],other | |
| 81,codellama_70b_instruct,2.913,grounding,biggen_240612,[],other | |
| 82,tulu_2_dpo_70b,3.7,grounding,biggen_240612,[],other | |
| 83,c4ai_command_r_plus_gptq,3.788,grounding,biggen_240612,[],other | |
| 84,meta_llama_3_70b_instruct,4.013,grounding,biggen_240612,[],other | |
| 85,mixtral_8x22b_instruct_v0.1_awq,3.812,grounding,biggen_240612,[],other | |
| 86,zephyr_orpo_141b_a35b_v0.1_awq,3.425,grounding,biggen_240612,[],other | |
| 87,qwen1.5_72b_chat,3.938,grounding,biggen_240612,[],other | |
| 88,qwen_110b_chat,4.025,grounding,biggen_240612,[],other | |
| 89,gpt_3.5_turbo_1106,3.875,grounding,biggen_240612,[],other | |
| 90,gpt_3.5_turbo_0125,3.737,grounding,biggen_240612,[],other | |
| 91,gpt_4_1106_preview,4.237,grounding,biggen_240612,[],other | |
| 92,gpt_4_0125_preview,4.2,grounding,biggen_240612,[],other | |
| 93,gpt_4_turbo_2024_04_09,4.188,grounding,biggen_240612,[],other | |
| 94,gpt_4o_2024_05_13,4.088,grounding,biggen_240612,[],other | |
| 95,mistral_medium_hjpark,3.938,grounding,biggen_240612,[],other | |
| 96,mistral_large_hjpark,3.913,grounding,biggen_240612,[],other | |
| 97,gemini_1.0_pro,3.6,grounding,biggen_240612,[],other | |
| 98,gemini_pro_1.5,3.938,grounding,biggen_240612,[],other | |
| 99,gemini_flash_1.5,4.112,grounding,biggen_240612,[],other | |
| 100,claude_3_haiku_20240307,4.1,grounding,biggen_240612,[],other | |
| 101,claude_3_sonnet_20240229,4.05,grounding,biggen_240612,[],other | |
| 102,claude_3_opus_20240229,4.088,grounding,biggen_240612,[],other | |
| 103,phi_1,1.0,instruction_following,biggen_240612,[],other | |
| 104,phi_1_5,2.77,instruction_following,biggen_240612,[],other | |
| 105,phi_2,2.86,instruction_following,biggen_240612,[],other | |
| 106,qwen1.5_0.5b,2.06,instruction_following,biggen_240612,[],other | |
| 107,qwen1.5_1.8b,2.79,instruction_following,biggen_240612,[],other | |
| 108,qwen1.5_4b,2.82,instruction_following,biggen_240612,[],other | |
| 109,gemma_2b,2.61,instruction_following,biggen_240612,[],other | |
| 110,olmo_1b,1.7,instruction_following,biggen_240612,[],other | |
| 111,qwen1.5_0.5b_chat,2.36,instruction_following,biggen_240612,[],other | |
| 112,qwen1.5_1.8b_chat,3.09,instruction_following,biggen_240612,[],other | |
| 113,qwen1.5_4b_chat,2.99,instruction_following,biggen_240612,[],other | |
| 114,phi_3_mini_4k_instruct,3.82,instruction_following,biggen_240612,[],other | |
| 115,phi_3_mini_128k_instruct,3.66,instruction_following,biggen_240612,[],other | |
| 116,gemma_2b_it,3.12,instruction_following,biggen_240612,[],other | |
| 117,gemma_1.1_2b_it,3.21,instruction_following,biggen_240612,[],other | |
| 118,gemma_7b,1.53,instruction_following,biggen_240612,[],other | |
| 119,mistral_7b_v0.1,3.22,instruction_following,biggen_240612,[],other | |
| 120,mistral_7b_v0.2,3.31,instruction_following,biggen_240612,[],other | |
| 121,qwen1.5_7b,3.03,instruction_following,biggen_240612,[],other | |
| 122,yi_6b,2.77,instruction_following,biggen_240612,[],other | |
| 123,llama_2_7b,2.73,instruction_following,biggen_240612,[],other | |
| 124,codellama_7b,2.01,instruction_following,biggen_240612,[],other | |
| 125,meta_llama_3_8b,2.84,instruction_following,biggen_240612,[],other | |
| 126,llemma_7b,2.44,instruction_following,biggen_240612,[],other | |
| 127,olmo_7b,2.23,instruction_following,biggen_240612,[],other | |
| 128,gemma_7b_it,3.31,instruction_following,biggen_240612,[],other | |
| 129,gemma_1.1_7b_it,3.47,instruction_following,biggen_240612,[],other | |
| 130,mistral_7b_instruct_v0.2,3.74,instruction_following,biggen_240612,[],other | |
| 131,qwen1.5_7b_chat,3.83,instruction_following,biggen_240612,[],other | |
| 132,yi_6b_chat,3.5,instruction_following,biggen_240612,[],other | |
| 133,llama_2_7b_chat,3.55,instruction_following,biggen_240612,[],other | |
| 134,codellama_7b_instruct,3.26,instruction_following,biggen_240612,[],other | |
| 135,meta_llama_3_8b_instruct,3.75,instruction_following,biggen_240612,[],other | |
| 136,olmo_7b_sft,3.18,instruction_following,biggen_240612,[],other | |
| 137,olmo_7b_instruct,3.29,instruction_following,biggen_240612,[],other | |
| 138,tulu_2_7b,3.35,instruction_following,biggen_240612,[],other | |
| 139,tulu_2_dpo_7b,3.64,instruction_following,biggen_240612,[],other | |
| 140,codetulu_2_7b,3.11,instruction_following,biggen_240612,[],other | |
| 141,orca_2_7b,2.23,instruction_following,biggen_240612,[],other | |
| 142,openchat_3.5_0106,3.73,instruction_following,biggen_240612,[],other | |
| 143,openhermes_2_mistral_7b,3.53,instruction_following,biggen_240612,[],other | |
| 144,openhermes_2.5_mistral_7b,3.34,instruction_following,biggen_240612,[],other | |
| 145,nous_hermes_2_mistral_7b_dpo,3.61,instruction_following,biggen_240612,[],other | |
| 146,starling_lm_7b_alpha,3.62,instruction_following,biggen_240612,[],other | |
| 147,starling_lm_7b_beta,3.82,instruction_following,biggen_240612,[],other | |
| 148,mistral_orpo_alpha,3.53,instruction_following,biggen_240612,[],other | |
| 149,mistral_orpo_beta,3.76,instruction_following,biggen_240612,[],other | |
| 150,zephyr_7b_beta,3.69,instruction_following,biggen_240612,[],other | |
| 151,qwen1.5_14b,3.41,instruction_following,biggen_240612,[],other | |
| 152,llama_2_13b,2.99,instruction_following,biggen_240612,[],other | |
| 153,codellama_13b,2.08,instruction_following,biggen_240612,[],other | |
| 154,solar_10.7b_v1.0,3.53,instruction_following,biggen_240612,[],other | |
| 155,qwen1.5_14b_chat,3.84,instruction_following,biggen_240612,[],other | |
| 156,solar_10.7b_instruct_v1.0,3.73,instruction_following,biggen_240612,[],other | |
| 157,aya_101,1.33,instruction_following,biggen_240612,[],other | |
| 158,llama_2_13b_chat,3.72,instruction_following,biggen_240612,[],other | |
| 159,codellama_13b_instruct,3.13,instruction_following,biggen_240612,[],other | |
| 160,tulu_2_13b,3.4,instruction_following,biggen_240612,[],other | |
| 161,tulu_2_dpo_13b,3.65,instruction_following,biggen_240612,[],other | |
| 162,codetulu_2_13b,3.33,instruction_following,biggen_240612,[],other | |
| 163,orca_2_13b,2.45,instruction_following,biggen_240612,[],other | |
| 164,yi_34b,3.47,instruction_following,biggen_240612,[],other | |
| 165,llemma_34b,2.74,instruction_following,biggen_240612,[],other | |
| 166,qwen1.5_32b,3.63,instruction_following,biggen_240612,[],other | |
| 167,codellama_34b,2.49,instruction_following,biggen_240612,[],other | |
| 168,mixtral_8x7b_v0.1,3.45,instruction_following,biggen_240612,[],other | |
| 169,yi_34b_chat,3.79,instruction_following,biggen_240612,[],other | |
| 170,nous_hermes_2_yi_34b,3.65,instruction_following,biggen_240612,[],other | |
| 171,codellama_34b_instruct,3.5,instruction_following,biggen_240612,[],other | |
| 172,codetulu_2_34b,3.44,instruction_following,biggen_240612,[],other | |
| 173,qwen1.5_32b_chat,3.92,instruction_following,biggen_240612,[],other | |
| 174,mixtral_8x7b_instruct_v0.1,3.95,instruction_following,biggen_240612,[],other | |
| 175,nous_hermes_2_mixtral_8x7b_sft,3.7,instruction_following,biggen_240612,[],other | |
| 176,nous_hermes_2_mixtral_8x7b_dpo,3.83,instruction_following,biggen_240612,[],other | |
| 177,c4ai_command_r_v01,3.67,instruction_following,biggen_240612,[],other | |
| 178,llama_2_70b,3.4,instruction_following,biggen_240612,[],other | |
| 179,codellama_70b,2.46,instruction_following,biggen_240612,[],other | |
| 180,mixtral_8x22b_v0.1_awq,3.59,instruction_following,biggen_240612,[],other | |
| 181,meta_llama_3_70b,3.26,instruction_following,biggen_240612,[],other | |
| 182,qwen1.5_72b,3.5,instruction_following,biggen_240612,[],other | |
| 183,llama_2_70b_chat,3.71,instruction_following,biggen_240612,[],other | |
| 184,codellama_70b_instruct,2.53,instruction_following,biggen_240612,[],other | |
| 185,tulu_2_dpo_70b,3.79,instruction_following,biggen_240612,[],other | |
| 186,c4ai_command_r_plus_gptq,3.89,instruction_following,biggen_240612,[],other | |
| 187,meta_llama_3_70b_instruct,4.02,instruction_following,biggen_240612,[],other | |
| 188,mixtral_8x22b_instruct_v0.1_awq,3.91,instruction_following,biggen_240612,[],other | |
| 189,zephyr_orpo_141b_a35b_v0.1_awq,3.57,instruction_following,biggen_240612,[],other | |
| 190,qwen1.5_72b_chat,4.0,instruction_following,biggen_240612,[],other | |
| 191,qwen_110b_chat,3.89,instruction_following,biggen_240612,[],other | |
| 192,gpt_3.5_turbo_1106,3.73,instruction_following,biggen_240612,[],other | |
| 193,gpt_3.5_turbo_0125,3.74,instruction_following,biggen_240612,[],other | |
| 194,gpt_4_1106_preview,4.23,instruction_following,biggen_240612,[],other | |
| 195,gpt_4_0125_preview,4.12,instruction_following,biggen_240612,[],other | |
| 196,gpt_4_turbo_2024_04_09,4.04,instruction_following,biggen_240612,[],other | |
| 197,gpt_4o_2024_05_13,4.1,instruction_following,biggen_240612,[],other | |
| 198,mistral_medium_hjpark,3.88,instruction_following,biggen_240612,[],other | |
| 199,mistral_large_hjpark,3.82,instruction_following,biggen_240612,[],other | |
| 200,gemini_1.0_pro,3.67,instruction_following,biggen_240612,[],other | |
| 201,gemini_pro_1.5,3.91,instruction_following,biggen_240612,[],other | |
| 202,gemini_flash_1.5,3.78,instruction_following,biggen_240612,[],other | |
| 203,claude_3_haiku_20240307,4.0,instruction_following,biggen_240612,[],other | |
| 204,claude_3_sonnet_20240229,3.84,instruction_following,biggen_240612,[],other | |
| 205,claude_3_opus_20240229,4.0,instruction_following,biggen_240612,[],other | |
| 206,phi_1,1.0,planning,biggen_240612,[],other | |
| 207,phi_1_5,2.314,planning,biggen_240612,[],other | |
| 208,phi_2,2.6,planning,biggen_240612,[],other | |
| 209,qwen1.5_0.5b,1.471,planning,biggen_240612,[],other | |
| 210,qwen1.5_1.8b,2.214,planning,biggen_240612,[],other | |
| 211,qwen1.5_4b,2.557,planning,biggen_240612,[],other | |
| 212,gemma_2b,2.129,planning,biggen_240612,[],other | |
| 213,olmo_1b,1.343,planning,biggen_240612,[],other | |
| 214,qwen1.5_0.5b_chat,1.957,planning,biggen_240612,[],other | |
| 215,qwen1.5_1.8b_chat,2.629,planning,biggen_240612,[],other | |
| 216,qwen1.5_4b_chat,2.914,planning,biggen_240612,[],other | |
| 217,phi_3_mini_4k_instruct,3.486,planning,biggen_240612,[],other | |
| 218,phi_3_mini_128k_instruct,3.5,planning,biggen_240612,[],other | |
| 219,gemma_2b_it,3.0,planning,biggen_240612,[],other | |
| 220,gemma_1.1_2b_it,3.0,planning,biggen_240612,[],other | |
| 221,gemma_7b,1.171,planning,biggen_240612,[],other | |
| 222,mistral_7b_v0.1,3.029,planning,biggen_240612,[],other | |
| 223,mistral_7b_v0.2,2.871,planning,biggen_240612,[],other | |
| 224,qwen1.5_7b,2.814,planning,biggen_240612,[],other | |
| 225,yi_6b,2.271,planning,biggen_240612,[],other | |
| 226,llama_2_7b,2.4,planning,biggen_240612,[],other | |
| 227,codellama_7b,1.586,planning,biggen_240612,[],other | |
| 228,meta_llama_3_8b,2.414,planning,biggen_240612,[],other | |
| 229,llemma_7b,1.971,planning,biggen_240612,[],other | |
| 230,olmo_7b,1.757,planning,biggen_240612,[],other | |
| 231,gemma_7b_it,2.857,planning,biggen_240612,[],other | |
| 232,gemma_1.1_7b_it,3.143,planning,biggen_240612,[],other | |
| 233,mistral_7b_instruct_v0.2,3.7,planning,biggen_240612,[],other | |
| 234,qwen1.5_7b_chat,3.471,planning,biggen_240612,[],other | |
| 235,yi_6b_chat,3.171,planning,biggen_240612,[],other | |
| 236,llama_2_7b_chat,3.286,planning,biggen_240612,[],other | |
| 237,codellama_7b_instruct,2.914,planning,biggen_240612,[],other | |
| 238,meta_llama_3_8b_instruct,3.714,planning,biggen_240612,[],other | |
| 239,olmo_7b_sft,2.843,planning,biggen_240612,[],other | |
| 240,olmo_7b_instruct,2.986,planning,biggen_240612,[],other | |
| 241,tulu_2_7b,3.129,planning,biggen_240612,[],other | |
| 242,tulu_2_dpo_7b,3.229,planning,biggen_240612,[],other | |
| 243,codetulu_2_7b,2.929,planning,biggen_240612,[],other | |
| 244,orca_2_7b,1.3,planning,biggen_240612,[],other | |
| 245,openchat_3.5_0106,3.643,planning,biggen_240612,[],other | |
| 246,openhermes_2_mistral_7b,3.529,planning,biggen_240612,[],other | |
| 247,openhermes_2.5_mistral_7b,3.457,planning,biggen_240612,[],other | |
| 248,nous_hermes_2_mistral_7b_dpo,3.514,planning,biggen_240612,[],other | |
| 249,starling_lm_7b_alpha,3.557,planning,biggen_240612,[],other | |
| 250,starling_lm_7b_beta,3.671,planning,biggen_240612,[],other | |
| 251,mistral_orpo_alpha,3.329,planning,biggen_240612,[],other | |
| 252,mistral_orpo_beta,3.3,planning,biggen_240612,[],other | |
| 253,zephyr_7b_beta,3.571,planning,biggen_240612,[],other | |
| 254,qwen1.5_14b,2.9,planning,biggen_240612,[],other | |
| 255,llama_2_13b,2.629,planning,biggen_240612,[],other | |
| 256,codellama_13b,1.814,planning,biggen_240612,[],other | |
| 257,solar_10.7b_v1.0,3.057,planning,biggen_240612,[],other | |
| 258,qwen1.5_14b_chat,3.657,planning,biggen_240612,[],other | |
| 259,solar_10.7b_instruct_v1.0,3.614,planning,biggen_240612,[],other | |
| 260,aya_101,1.357,planning,biggen_240612,[],other | |
| 261,llama_2_13b_chat,3.4,planning,biggen_240612,[],other | |
| 262,codellama_13b_instruct,3.086,planning,biggen_240612,[],other | |
| 263,tulu_2_13b,3.371,planning,biggen_240612,[],other | |
| 264,tulu_2_dpo_13b,3.371,planning,biggen_240612,[],other | |
| 265,codetulu_2_13b,3.1,planning,biggen_240612,[],other | |
| 266,orca_2_13b,1.6,planning,biggen_240612,[],other | |
| 267,yi_34b,3.243,planning,biggen_240612,[],other | |
| 268,llemma_34b,2.529,planning,biggen_240612,[],other | |
| 269,qwen1.5_32b,3.229,planning,biggen_240612,[],other | |
| 270,codellama_34b,2.257,planning,biggen_240612,[],other | |
| 271,mixtral_8x7b_v0.1,3.286,planning,biggen_240612,[],other | |
| 272,yi_34b_chat,3.729,planning,biggen_240612,[],other | |
| 273,nous_hermes_2_yi_34b,3.543,planning,biggen_240612,[],other | |
| 274,codellama_34b_instruct,3.171,planning,biggen_240612,[],other | |
| 275,codetulu_2_34b,3.5,planning,biggen_240612,[],other | |
| 276,qwen1.5_32b_chat,3.829,planning,biggen_240612,[],other | |
| 277,mixtral_8x7b_instruct_v0.1,3.457,planning,biggen_240612,[],other | |
| 278,nous_hermes_2_mixtral_8x7b_sft,3.586,planning,biggen_240612,[],other | |
| 279,nous_hermes_2_mixtral_8x7b_dpo,3.657,planning,biggen_240612,[],other | |
| 280,c4ai_command_r_v01,3.643,planning,biggen_240612,[],other | |
| 281,llama_2_70b,3.2,planning,biggen_240612,[],other | |
| 282,codellama_70b,2.357,planning,biggen_240612,[],other | |
| 283,mixtral_8x22b_v0.1_awq,3.457,planning,biggen_240612,[],other | |
| 284,meta_llama_3_70b,2.8,planning,biggen_240612,[],other | |
| 285,qwen1.5_72b,3.186,planning,biggen_240612,[],other | |
| 286,llama_2_70b_chat,3.671,planning,biggen_240612,[],other | |
| 287,codellama_70b_instruct,2.5,planning,biggen_240612,[],other | |
| 288,tulu_2_dpo_70b,3.886,planning,biggen_240612,[],other | |
| 289,c4ai_command_r_plus_gptq,3.914,planning,biggen_240612,[],other | |
| 290,meta_llama_3_70b_instruct,3.929,planning,biggen_240612,[],other | |
| 291,mixtral_8x22b_instruct_v0.1_awq,3.729,planning,biggen_240612,[],other | |
| 292,zephyr_orpo_141b_a35b_v0.1_awq,3.8,planning,biggen_240612,[],other | |
| 293,qwen1.5_72b_chat,3.814,planning,biggen_240612,[],other | |
| 294,qwen_110b_chat,3.957,planning,biggen_240612,[],other | |
| 295,gpt_3.5_turbo_1106,3.871,planning,biggen_240612,[],other | |
| 296,gpt_3.5_turbo_0125,3.871,planning,biggen_240612,[],other | |
| 297,gpt_4_1106_preview,4.157,planning,biggen_240612,[],other | |
| 298,gpt_4_0125_preview,4.243,planning,biggen_240612,[],other | |
| 299,gpt_4_turbo_2024_04_09,4.029,planning,biggen_240612,[],other | |
| 300,gpt_4o_2024_05_13,4.086,planning,biggen_240612,[],other | |
| 301,mistral_medium_hjpark,3.914,planning,biggen_240612,[],other | |
| 302,mistral_large_hjpark,3.9,planning,biggen_240612,[],other | |
| 303,gemini_1.0_pro,3.714,planning,biggen_240612,[],other | |
| 304,gemini_pro_1.5,3.929,planning,biggen_240612,[],other | |
| 305,gemini_flash_1.5,3.771,planning,biggen_240612,[],other | |
| 306,claude_3_haiku_20240307,4.043,planning,biggen_240612,[],other | |
| 307,claude_3_sonnet_20240229,4.057,planning,biggen_240612,[],other | |
| 308,claude_3_opus_20240229,4.1,planning,biggen_240612,[],other | |
| 309,phi_1,1.0,reasoning,biggen_240612,[],reasoning | |
| 310,phi_1_5,2.13,reasoning,biggen_240612,[],reasoning | |
| 311,phi_2,2.7,reasoning,biggen_240612,[],reasoning | |
| 312,qwen1.5_0.5b,1.5,reasoning,biggen_240612,[],reasoning | |
| 313,qwen1.5_1.8b,1.83,reasoning,biggen_240612,[],reasoning | |
| 314,qwen1.5_4b,2.3,reasoning,biggen_240612,[],reasoning | |
| 315,gemma_2b,1.99,reasoning,biggen_240612,[],reasoning | |
| 316,olmo_1b,1.33,reasoning,biggen_240612,[],reasoning | |
| 317,qwen1.5_0.5b_chat,1.68,reasoning,biggen_240612,[],reasoning | |
| 318,qwen1.5_1.8b_chat,2.28,reasoning,biggen_240612,[],reasoning | |
| 319,qwen1.5_4b_chat,2.69,reasoning,biggen_240612,[],reasoning | |
| 320,phi_3_mini_4k_instruct,3.59,reasoning,biggen_240612,[],reasoning | |
| 321,phi_3_mini_128k_instruct,3.61,reasoning,biggen_240612,[],reasoning | |
| 322,gemma_2b_it,2.39,reasoning,biggen_240612,[],reasoning | |
| 323,gemma_1.1_2b_it,2.49,reasoning,biggen_240612,[],reasoning | |
| 324,gemma_7b,1.28,reasoning,biggen_240612,[],reasoning | |
| 325,mistral_7b_v0.1,2.75,reasoning,biggen_240612,[],reasoning | |
| 326,mistral_7b_v0.2,2.65,reasoning,biggen_240612,[],reasoning | |
| 327,qwen1.5_7b,2.37,reasoning,biggen_240612,[],reasoning | |
| 328,yi_6b,2.25,reasoning,biggen_240612,[],reasoning | |
| 329,llama_2_7b,2.03,reasoning,biggen_240612,[],reasoning | |
| 330,codellama_7b,1.57,reasoning,biggen_240612,[],reasoning | |
| 331,meta_llama_3_8b,2.32,reasoning,biggen_240612,[],reasoning | |
| 332,llemma_7b,2.07,reasoning,biggen_240612,[],reasoning | |
| 333,olmo_7b,1.76,reasoning,biggen_240612,[],reasoning | |
| 334,gemma_7b_it,2.88,reasoning,biggen_240612,[],reasoning | |
| 335,gemma_1.1_7b_it,3.05,reasoning,biggen_240612,[],reasoning | |
| 336,mistral_7b_instruct_v0.2,3.06,reasoning,biggen_240612,[],reasoning | |
| 337,qwen1.5_7b_chat,3.02,reasoning,biggen_240612,[],reasoning | |
| 338,yi_6b_chat,2.61,reasoning,biggen_240612,[],reasoning | |
| 339,llama_2_7b_chat,2.72,reasoning,biggen_240612,[],reasoning | |
| 340,codellama_7b_instruct,2.52,reasoning,biggen_240612,[],reasoning | |
| 341,meta_llama_3_8b_instruct,3.32,reasoning,biggen_240612,[],reasoning | |
| 342,olmo_7b_sft,2.37,reasoning,biggen_240612,[],reasoning | |
| 343,olmo_7b_instruct,2.38,reasoning,biggen_240612,[],reasoning | |
| 344,tulu_2_7b,2.57,reasoning,biggen_240612,[],reasoning | |
| 345,tulu_2_dpo_7b,2.68,reasoning,biggen_240612,[],reasoning | |
| 346,codetulu_2_7b,2.56,reasoning,biggen_240612,[],reasoning | |
| 347,orca_2_7b,1.75,reasoning,biggen_240612,[],reasoning | |
| 348,openchat_3.5_0106,3.23,reasoning,biggen_240612,[],reasoning | |
| 349,openhermes_2_mistral_7b,3.09,reasoning,biggen_240612,[],reasoning | |
| 350,openhermes_2.5_mistral_7b,3.12,reasoning,biggen_240612,[],reasoning | |
| 351,nous_hermes_2_mistral_7b_dpo,3.11,reasoning,biggen_240612,[],reasoning | |
| 352,starling_lm_7b_alpha,3.24,reasoning,biggen_240612,[],reasoning | |
| 353,starling_lm_7b_beta,3.46,reasoning,biggen_240612,[],reasoning | |
| 354,mistral_orpo_alpha,2.93,reasoning,biggen_240612,[],reasoning | |
| 355,mistral_orpo_beta,2.96,reasoning,biggen_240612,[],reasoning | |
| 356,zephyr_7b_beta,3.08,reasoning,biggen_240612,[],reasoning | |
| 357,qwen1.5_14b,2.77,reasoning,biggen_240612,[],reasoning | |
| 358,llama_2_13b,2.17,reasoning,biggen_240612,[],reasoning | |
| 359,codellama_13b,1.89,reasoning,biggen_240612,[],reasoning | |
| 360,solar_10.7b_v1.0,2.72,reasoning,biggen_240612,[],reasoning | |
| 361,qwen1.5_14b_chat,3.38,reasoning,biggen_240612,[],reasoning | |
| 362,solar_10.7b_instruct_v1.0,3.23,reasoning,biggen_240612,[],reasoning | |
| 363,aya_101,1.34,reasoning,biggen_240612,[],reasoning | |
| 364,llama_2_13b_chat,2.61,reasoning,biggen_240612,[],reasoning | |
| 365,codellama_13b_instruct,2.78,reasoning,biggen_240612,[],reasoning | |
| 366,tulu_2_13b,2.7,reasoning,biggen_240612,[],reasoning | |
| 367,tulu_2_dpo_13b,2.8,reasoning,biggen_240612,[],reasoning | |
| 368,codetulu_2_13b,2.62,reasoning,biggen_240612,[],reasoning | |
| 369,orca_2_13b,2.22,reasoning,biggen_240612,[],reasoning | |
| 370,yi_34b,3.06,reasoning,biggen_240612,[],reasoning | |
| 371,llemma_34b,2.56,reasoning,biggen_240612,[],reasoning | |
| 372,qwen1.5_32b,3.07,reasoning,biggen_240612,[],reasoning | |
| 373,codellama_34b,2.0,reasoning,biggen_240612,[],reasoning | |
| 374,mixtral_8x7b_v0.1,3.13,reasoning,biggen_240612,[],reasoning | |
| 375,yi_34b_chat,3.25,reasoning,biggen_240612,[],reasoning | |
| 376,nous_hermes_2_yi_34b,3.3,reasoning,biggen_240612,[],reasoning | |
| 377,codellama_34b_instruct,2.95,reasoning,biggen_240612,[],reasoning | |
| 378,codetulu_2_34b,2.97,reasoning,biggen_240612,[],reasoning | |
| 379,qwen1.5_32b_chat,3.47,reasoning,biggen_240612,[],reasoning | |
| 380,mixtral_8x7b_instruct_v0.1,3.58,reasoning,biggen_240612,[],reasoning | |
| 381,nous_hermes_2_mixtral_8x7b_sft,3.29,reasoning,biggen_240612,[],reasoning | |
| 382,nous_hermes_2_mixtral_8x7b_dpo,3.42,reasoning,biggen_240612,[],reasoning | |
| 383,c4ai_command_r_v01,3.25,reasoning,biggen_240612,[],reasoning | |
| 384,llama_2_70b,2.86,reasoning,biggen_240612,[],reasoning | |
| 385,codellama_70b,2.35,reasoning,biggen_240612,[],reasoning | |
| 386,mixtral_8x22b_v0.1_awq,3.48,reasoning,biggen_240612,[],reasoning | |
| 387,meta_llama_3_70b,2.88,reasoning,biggen_240612,[],reasoning | |
| 388,qwen1.5_72b,3.2,reasoning,biggen_240612,[],reasoning | |
| 389,llama_2_70b_chat,3.1,reasoning,biggen_240612,[],reasoning | |
| 390,codellama_70b_instruct,2.56,reasoning,biggen_240612,[],reasoning | |
| 391,tulu_2_dpo_70b,3.12,reasoning,biggen_240612,[],reasoning | |
| 392,c4ai_command_r_plus_gptq,3.48,reasoning,biggen_240612,[],reasoning | |
| 393,meta_llama_3_70b_instruct,3.77,reasoning,biggen_240612,[],reasoning | |
| 394,mixtral_8x22b_instruct_v0.1_awq,3.76,reasoning,biggen_240612,[],reasoning | |
| 395,zephyr_orpo_141b_a35b_v0.1_awq,3.42,reasoning,biggen_240612,[],reasoning | |
| 396,qwen1.5_72b_chat,3.65,reasoning,biggen_240612,[],reasoning | |
| 397,qwen_110b_chat,3.8,reasoning,biggen_240612,[],reasoning | |
| 398,gpt_3.5_turbo_1106,3.37,reasoning,biggen_240612,[],reasoning | |
| 399,gpt_3.5_turbo_0125,3.58,reasoning,biggen_240612,[],reasoning | |
| 400,gpt_4_1106_preview,4.15,reasoning,biggen_240612,[],reasoning | |
| 401,gpt_4_0125_preview,4.2,reasoning,biggen_240612,[],reasoning | |
| 402,gpt_4_turbo_2024_04_09,4.13,reasoning,biggen_240612,[],reasoning | |
| 403,gpt_4o_2024_05_13,4.03,reasoning,biggen_240612,[],reasoning | |
| 404,mistral_medium_hjpark,3.89,reasoning,biggen_240612,[],reasoning | |
| 405,mistral_large_hjpark,3.78,reasoning,biggen_240612,[],reasoning | |
| 406,gemini_1.0_pro,3.61,reasoning,biggen_240612,[],reasoning | |
| 407,gemini_pro_1.5,3.89,reasoning,biggen_240612,[],reasoning | |
| 408,gemini_flash_1.5,3.85,reasoning,biggen_240612,[],reasoning | |
| 409,claude_3_haiku_20240307,3.55,reasoning,biggen_240612,[],reasoning | |
| 410,claude_3_sonnet_20240229,3.82,reasoning,biggen_240612,[],reasoning | |
| 411,claude_3_opus_20240229,3.9,reasoning,biggen_240612,[],reasoning | |
| 412,phi_1,1.303,refinement,biggen_240612,[],other | |
| 413,phi_1_5,2.329,refinement,biggen_240612,[],other | |
| 414,phi_2,2.789,refinement,biggen_240612,[],other | |
| 415,qwen1.5_0.5b,1.934,refinement,biggen_240612,[],other | |
| 416,qwen1.5_1.8b,2.408,refinement,biggen_240612,[],other | |
| 417,qwen1.5_4b,2.447,refinement,biggen_240612,[],other | |
| 418,gemma_2b,1.934,refinement,biggen_240612,[],other | |
| 419,olmo_1b,1.737,refinement,biggen_240612,[],other | |
| 420,qwen1.5_0.5b_chat,1.776,refinement,biggen_240612,[],other | |
| 421,qwen1.5_1.8b_chat,2.553,refinement,biggen_240612,[],other | |
| 422,qwen1.5_4b_chat,2.579,refinement,biggen_240612,[],other | |
| 423,phi_3_mini_4k_instruct,3.763,refinement,biggen_240612,[],other | |
| 424,phi_3_mini_128k_instruct,3.539,refinement,biggen_240612,[],other | |
| 425,gemma_2b_it,2.724,refinement,biggen_240612,[],other | |
| 426,gemma_1.1_2b_it,2.947,refinement,biggen_240612,[],other | |
| 427,gemma_7b,1.474,refinement,biggen_240612,[],other | |
| 428,mistral_7b_v0.1,2.566,refinement,biggen_240612,[],other | |
| 429,mistral_7b_v0.2,2.579,refinement,biggen_240612,[],other | |
| 430,qwen1.5_7b,2.579,refinement,biggen_240612,[],other | |
| 431,yi_6b,2.434,refinement,biggen_240612,[],other | |
| 432,llama_2_7b,2.092,refinement,biggen_240612,[],other | |
| 433,codellama_7b,1.776,refinement,biggen_240612,[],other | |
| 434,meta_llama_3_8b,2.829,refinement,biggen_240612,[],other | |
| 435,llemma_7b,2.158,refinement,biggen_240612,[],other | |
| 436,olmo_7b,1.868,refinement,biggen_240612,[],other | |
| 437,gemma_7b_it,3.039,refinement,biggen_240612,[],other | |
| 438,gemma_1.1_7b_it,3.158,refinement,biggen_240612,[],other | |
| 439,mistral_7b_instruct_v0.2,3.355,refinement,biggen_240612,[],other | |
| 440,qwen1.5_7b_chat,3.132,refinement,biggen_240612,[],other | |
| 441,yi_6b_chat,2.803,refinement,biggen_240612,[],other | |
| 442,llama_2_7b_chat,2.987,refinement,biggen_240612,[],other | |
| 443,codellama_7b_instruct,2.671,refinement,biggen_240612,[],other | |
| 444,meta_llama_3_8b_instruct,3.408,refinement,biggen_240612,[],other | |
| 445,olmo_7b_sft,2.224,refinement,biggen_240612,[],other | |
| 446,olmo_7b_instruct,2.539,refinement,biggen_240612,[],other | |
| 447,tulu_2_7b,2.789,refinement,biggen_240612,[],other | |
| 448,tulu_2_dpo_7b,2.868,refinement,biggen_240612,[],other | |
| 449,codetulu_2_7b,2.763,refinement,biggen_240612,[],other | |
| 450,orca_2_7b,2.066,refinement,biggen_240612,[],other | |
| 451,openchat_3.5_0106,3.408,refinement,biggen_240612,[],other | |
| 452,openhermes_2_mistral_7b,3.079,refinement,biggen_240612,[],other | |
| 453,openhermes_2.5_mistral_7b,2.855,refinement,biggen_240612,[],other | |
| 454,nous_hermes_2_mistral_7b_dpo,3.158,refinement,biggen_240612,[],other | |
| 455,starling_lm_7b_alpha,3.092,refinement,biggen_240612,[],other | |
| 456,starling_lm_7b_beta,3.421,refinement,biggen_240612,[],other | |
| 457,mistral_orpo_alpha,3.184,refinement,biggen_240612,[],other | |
| 458,mistral_orpo_beta,2.987,refinement,biggen_240612,[],other | |
| 459,zephyr_7b_beta,3.158,refinement,biggen_240612,[],other | |
| 460,qwen1.5_14b,2.974,refinement,biggen_240612,[],other | |
| 461,llama_2_13b,2.382,refinement,biggen_240612,[],other | |
| 462,codellama_13b,1.697,refinement,biggen_240612,[],other | |
| 463,solar_10.7b_v1.0,3.092,refinement,biggen_240612,[],other | |
| 464,qwen1.5_14b_chat,3.25,refinement,biggen_240612,[],other | |
| 465,solar_10.7b_instruct_v1.0,3.289,refinement,biggen_240612,[],other | |
| 466,aya_101,1.882,refinement,biggen_240612,[],other | |
| 467,llama_2_13b_chat,3.066,refinement,biggen_240612,[],other | |
| 468,codellama_13b_instruct,2.526,refinement,biggen_240612,[],other | |
| 469,tulu_2_13b,2.803,refinement,biggen_240612,[],other | |
| 470,tulu_2_dpo_13b,3.118,refinement,biggen_240612,[],other | |
| 471,codetulu_2_13b,2.961,refinement,biggen_240612,[],other | |
| 472,orca_2_13b,2.092,refinement,biggen_240612,[],other | |
| 473,yi_34b,2.921,refinement,biggen_240612,[],other | |
| 474,llemma_34b,2.566,refinement,biggen_240612,[],other | |
| 475,qwen1.5_32b,2.921,refinement,biggen_240612,[],other | |
| 476,codellama_34b,2.289,refinement,biggen_240612,[],other | |
| 477,mixtral_8x7b_v0.1,3.013,refinement,biggen_240612,[],other | |
| 478,yi_34b_chat,3.342,refinement,biggen_240612,[],other | |
| 479,nous_hermes_2_yi_34b,3.342,refinement,biggen_240612,[],other | |
| 480,codellama_34b_instruct,2.776,refinement,biggen_240612,[],other | |
| 481,codetulu_2_34b,3.039,refinement,biggen_240612,[],other | |
| 482,qwen1.5_32b_chat,3.145,refinement,biggen_240612,[],other | |
| 483,mixtral_8x7b_instruct_v0.1,3.329,refinement,biggen_240612,[],other | |
| 484,nous_hermes_2_mixtral_8x7b_sft,3.039,refinement,biggen_240612,[],other | |
| 485,nous_hermes_2_mixtral_8x7b_dpo,3.303,refinement,biggen_240612,[],other | |
| 486,c4ai_command_r_v01,3.316,refinement,biggen_240612,[],other | |
| 487,llama_2_70b,2.895,refinement,biggen_240612,[],other | |
| 488,codellama_70b,2.408,refinement,biggen_240612,[],other | |
| 489,mixtral_8x22b_v0.1_awq,3.237,refinement,biggen_240612,[],other | |
| 490,meta_llama_3_70b,3.066,refinement,biggen_240612,[],other | |
| 491,qwen1.5_72b,3.013,refinement,biggen_240612,[],other | |
| 492,llama_2_70b_chat,3.303,refinement,biggen_240612,[],other | |
| 493,codellama_70b_instruct,2.25,refinement,biggen_240612,[],other | |
| 494,tulu_2_dpo_70b,3.382,refinement,biggen_240612,[],other | |
| 495,c4ai_command_r_plus_gptq,3.447,refinement,biggen_240612,[],other | |
| 496,meta_llama_3_70b_instruct,3.776,refinement,biggen_240612,[],other | |
| 497,mixtral_8x22b_instruct_v0.1_awq,3.684,refinement,biggen_240612,[],other | |
| 498,zephyr_orpo_141b_a35b_v0.1_awq,3.303,refinement,biggen_240612,[],other | |
| 499,qwen1.5_72b_chat,3.868,refinement,biggen_240612,[],other | |
| 500,qwen_110b_chat,3.842,refinement,biggen_240612,[],other | |
| 501,gpt_3.5_turbo_1106,3.105,refinement,biggen_240612,[],other | |
| 502,gpt_3.5_turbo_0125,3.539,refinement,biggen_240612,[],other | |
| 503,gpt_4_1106_preview,4.263,refinement,biggen_240612,[],other | |
| 504,gpt_4_0125_preview,3.961,refinement,biggen_240612,[],other | |
| 505,gpt_4_turbo_2024_04_09,4.0,refinement,biggen_240612,[],other | |
| 506,gpt_4o_2024_05_13,3.855,refinement,biggen_240612,[],other | |
| 507,mistral_medium_hjpark,3.632,refinement,biggen_240612,[],other | |
| 508,mistral_large_hjpark,3.684,refinement,biggen_240612,[],other | |
| 509,gemini_1.0_pro,2.816,refinement,biggen_240612,[],other | |
| 510,gemini_pro_1.5,3.553,refinement,biggen_240612,[],other | |
| 511,gemini_flash_1.5,3.513,refinement,biggen_240612,[],other | |
| 512,claude_3_haiku_20240307,3.566,refinement,biggen_240612,[],other | |
| 513,claude_3_sonnet_20240229,3.658,refinement,biggen_240612,[],other | |
| 514,claude_3_opus_20240229,3.947,refinement,biggen_240612,[],other | |
| 515,phi_1,1.391,safety,biggen_240612,[],other | |
| 516,phi_1_5,2.87,safety,biggen_240612,[],other | |
| 517,phi_2,3.406,safety,biggen_240612,[],other | |
| 518,qwen1.5_0.5b,2.029,safety,biggen_240612,[],other | |
| 519,qwen1.5_1.8b,2.42,safety,biggen_240612,[],other | |
| 520,qwen1.5_4b,3.13,safety,biggen_240612,[],other | |
| 521,gemma_2b,2.42,safety,biggen_240612,[],other | |
| 522,olmo_1b,2.072,safety,biggen_240612,[],other | |
| 523,qwen1.5_0.5b_chat,2.594,safety,biggen_240612,[],other | |
| 524,qwen1.5_1.8b_chat,2.696,safety,biggen_240612,[],other | |
| 525,qwen1.5_4b_chat,3.362,safety,biggen_240612,[],other | |
| 526,phi_3_mini_4k_instruct,4.101,safety,biggen_240612,[],other | |
| 527,phi_3_mini_128k_instruct,3.986,safety,biggen_240612,[],other | |
| 528,gemma_2b_it,3.928,safety,biggen_240612,[],other | |
| 529,gemma_1.1_2b_it,3.884,safety,biggen_240612,[],other | |
| 530,gemma_7b,2.029,safety,biggen_240612,[],other | |
| 531,mistral_7b_v0.1,3.29,safety,biggen_240612,[],other | |
| 532,mistral_7b_v0.2,3.304,safety,biggen_240612,[],other | |
| 533,qwen1.5_7b,3.087,safety,biggen_240612,[],other | |
| 534,yi_6b,3.101,safety,biggen_240612,[],other | |
| 535,llama_2_7b,3.188,safety,biggen_240612,[],other | |
| 536,codellama_7b,2.377,safety,biggen_240612,[],other | |
| 537,meta_llama_3_8b,2.899,safety,biggen_240612,[],other | |
| 538,llemma_7b,2.435,safety,biggen_240612,[],other | |
| 539,olmo_7b,2.623,safety,biggen_240612,[],other | |
| 540,gemma_7b_it,3.768,safety,biggen_240612,[],other | |
| 541,gemma_1.1_7b_it,4.043,safety,biggen_240612,[],other | |
| 542,mistral_7b_instruct_v0.2,3.986,safety,biggen_240612,[],other | |
| 543,qwen1.5_7b_chat,3.928,safety,biggen_240612,[],other | |
| 544,yi_6b_chat,3.609,safety,biggen_240612,[],other | |
| 545,llama_2_7b_chat,4.261,safety,biggen_240612,[],other | |
| 546,codellama_7b_instruct,3.841,safety,biggen_240612,[],other | |
| 547,meta_llama_3_8b_instruct,3.652,safety,biggen_240612,[],other | |
| 548,olmo_7b_sft,3.435,safety,biggen_240612,[],other | |
| 549,olmo_7b_instruct,3.188,safety,biggen_240612,[],other | |
| 550,tulu_2_7b,3.797,safety,biggen_240612,[],other | |
| 551,tulu_2_dpo_7b,3.797,safety,biggen_240612,[],other | |
| 552,codetulu_2_7b,3.348,safety,biggen_240612,[],other | |
| 553,orca_2_7b,2.58,safety,biggen_240612,[],other | |
| 554,openchat_3.5_0106,3.971,safety,biggen_240612,[],other | |
| 555,openhermes_2_mistral_7b,3.203,safety,biggen_240612,[],other | |
| 556,openhermes_2.5_mistral_7b,3.101,safety,biggen_240612,[],other | |
| 557,nous_hermes_2_mistral_7b_dpo,3.333,safety,biggen_240612,[],other | |
| 558,starling_lm_7b_alpha,3.797,safety,biggen_240612,[],other | |
| 559,starling_lm_7b_beta,3.841,safety,biggen_240612,[],other | |
| 560,mistral_orpo_alpha,3.826,safety,biggen_240612,[],other | |
| 561,mistral_orpo_beta,3.609,safety,biggen_240612,[],other | |
| 562,zephyr_7b_beta,3.725,safety,biggen_240612,[],other | |
| 563,qwen1.5_14b,2.536,safety,biggen_240612,[],other | |
| 564,llama_2_13b,3.319,safety,biggen_240612,[],other | |
| 565,codellama_13b,2.304,safety,biggen_240612,[],other | |
| 566,solar_10.7b_v1.0,3.652,safety,biggen_240612,[],other | |
| 567,qwen1.5_14b_chat,4.058,safety,biggen_240612,[],other | |
| 568,solar_10.7b_instruct_v1.0,3.826,safety,biggen_240612,[],other | |
| 569,aya_101,1.58,safety,biggen_240612,[],other | |
| 570,llama_2_13b_chat,4.29,safety,biggen_240612,[],other | |
| 571,codellama_13b_instruct,4.116,safety,biggen_240612,[],other | |
| 572,tulu_2_13b,3.87,safety,biggen_240612,[],other | |
| 573,tulu_2_dpo_13b,3.928,safety,biggen_240612,[],other | |
| 574,codetulu_2_13b,3.42,safety,biggen_240612,[],other | |
| 575,orca_2_13b,2.913,safety,biggen_240612,[],other | |
| 576,yi_34b,3.464,safety,biggen_240612,[],other | |
| 577,llemma_34b,2.884,safety,biggen_240612,[],other | |
| 578,qwen1.5_32b,3.377,safety,biggen_240612,[],other | |
| 579,codellama_34b,2.536,safety,biggen_240612,[],other | |
| 580,mixtral_8x7b_v0.1,3.855,safety,biggen_240612,[],other | |
| 581,yi_34b_chat,4.087,safety,biggen_240612,[],other | |
| 582,nous_hermes_2_yi_34b,3.507,safety,biggen_240612,[],other | |
| 583,codellama_34b_instruct,4.145,safety,biggen_240612,[],other | |
| 584,codetulu_2_34b,3.739,safety,biggen_240612,[],other | |
| 585,qwen1.5_32b_chat,4.116,safety,biggen_240612,[],other | |
| 586,mixtral_8x7b_instruct_v0.1,3.884,safety,biggen_240612,[],other | |
| 587,nous_hermes_2_mixtral_8x7b_sft,3.551,safety,biggen_240612,[],other | |
| 588,nous_hermes_2_mixtral_8x7b_dpo,3.667,safety,biggen_240612,[],other | |
| 589,c4ai_command_r_v01,3.913,safety,biggen_240612,[],other | |
| 590,llama_2_70b,3.913,safety,biggen_240612,[],other | |
| 591,codellama_70b,2.754,safety,biggen_240612,[],other | |
| 592,mixtral_8x22b_v0.1_awq,3.754,safety,biggen_240612,[],other | |
| 593,meta_llama_3_70b,3.058,safety,biggen_240612,[],other | |
| 594,qwen1.5_72b,3.957,safety,biggen_240612,[],other | |
| 595,llama_2_70b_chat,4.536,safety,biggen_240612,[],other | |
| 596,codellama_70b_instruct,4.043,safety,biggen_240612,[],other | |
| 597,tulu_2_dpo_70b,3.913,safety,biggen_240612,[],other | |
| 598,c4ai_command_r_plus_gptq,3.986,safety,biggen_240612,[],other | |
| 599,meta_llama_3_70b_instruct,3.87,safety,biggen_240612,[],other | |
| 600,mixtral_8x22b_instruct_v0.1_awq,3.899,safety,biggen_240612,[],other | |
| 601,zephyr_orpo_141b_a35b_v0.1_awq,3.435,safety,biggen_240612,[],other | |
| 602,qwen1.5_72b_chat,4.0,safety,biggen_240612,[],other | |
| 603,qwen_110b_chat,3.971,safety,biggen_240612,[],other | |
| 604,gpt_3.5_turbo_1106,4.13,safety,biggen_240612,[],other | |
| 605,gpt_3.5_turbo_0125,3.957,safety,biggen_240612,[],other | |
| 606,gpt_4_1106_preview,4.594,safety,biggen_240612,[],other | |
| 607,gpt_4_0125_preview,4.203,safety,biggen_240612,[],other | |
| 608,gpt_4_turbo_2024_04_09,4.116,safety,biggen_240612,[],other | |
| 609,gpt_4o_2024_05_13,4.043,safety,biggen_240612,[],other | |
| 610,mistral_medium_hjpark,4.13,safety,biggen_240612,[],other | |
| 611,mistral_large_hjpark,4.087,safety,biggen_240612,[],other | |
| 612,gemini_1.0_pro,4.043,safety,biggen_240612,[],other | |
| 613,gemini_pro_1.5,3.971,safety,biggen_240612,[],other | |
| 614,gemini_flash_1.5,4.203,safety,biggen_240612,[],other | |
| 615,claude_3_haiku_20240307,4.29,safety,biggen_240612,[],other | |
| 616,claude_3_sonnet_20240229,4.362,safety,biggen_240612,[],other | |
| 617,claude_3_opus_20240229,4.551,safety,biggen_240612,[],other | |
| 618,phi_1,1.01,theory_of_mind,biggen_240612,[],reasoning | |
| 619,phi_1_5,2.7,theory_of_mind,biggen_240612,[],reasoning | |
| 620,phi_2,3.0,theory_of_mind,biggen_240612,[],reasoning | |
| 621,qwen1.5_0.5b,1.75,theory_of_mind,biggen_240612,[],reasoning | |
| 622,qwen1.5_1.8b,2.36,theory_of_mind,biggen_240612,[],reasoning | |
| 623,qwen1.5_4b,2.61,theory_of_mind,biggen_240612,[],reasoning | |
| 624,gemma_2b,2.24,theory_of_mind,biggen_240612,[],reasoning | |
| 625,olmo_1b,1.44,theory_of_mind,biggen_240612,[],reasoning | |
| 626,qwen1.5_0.5b_chat,2.26,theory_of_mind,biggen_240612,[],reasoning | |
| 627,qwen1.5_1.8b_chat,3.03,theory_of_mind,biggen_240612,[],reasoning | |
| 628,qwen1.5_4b_chat,2.89,theory_of_mind,biggen_240612,[],reasoning | |
| 629,phi_3_mini_4k_instruct,3.78,theory_of_mind,biggen_240612,[],reasoning | |
| 630,phi_3_mini_128k_instruct,3.66,theory_of_mind,biggen_240612,[],reasoning | |
| 631,gemma_2b_it,3.16,theory_of_mind,biggen_240612,[],reasoning | |
| 632,gemma_1.1_2b_it,3.15,theory_of_mind,biggen_240612,[],reasoning | |
| 633,gemma_7b,1.17,theory_of_mind,biggen_240612,[],reasoning | |
| 634,mistral_7b_v0.1,2.97,theory_of_mind,biggen_240612,[],reasoning | |
| 635,mistral_7b_v0.2,3.1,theory_of_mind,biggen_240612,[],reasoning | |
| 636,qwen1.5_7b,2.68,theory_of_mind,biggen_240612,[],reasoning | |
| 637,yi_6b,2.74,theory_of_mind,biggen_240612,[],reasoning | |
| 638,llama_2_7b,2.37,theory_of_mind,biggen_240612,[],reasoning | |
| 639,codellama_7b,1.77,theory_of_mind,biggen_240612,[],reasoning | |
| 640,meta_llama_3_8b,2.57,theory_of_mind,biggen_240612,[],reasoning | |
| 641,llemma_7b,2.02,theory_of_mind,biggen_240612,[],reasoning | |
| 642,olmo_7b,1.97,theory_of_mind,biggen_240612,[],reasoning | |
| 643,gemma_7b_it,3.19,theory_of_mind,biggen_240612,[],reasoning | |
| 644,gemma_1.1_7b_it,3.354,theory_of_mind,biggen_240612,[],reasoning | |
| 645,mistral_7b_instruct_v0.2,3.68,theory_of_mind,biggen_240612,[],reasoning | |
| 646,qwen1.5_7b_chat,3.67,theory_of_mind,biggen_240612,[],reasoning | |
| 647,yi_6b_chat,3.545,theory_of_mind,biggen_240612,[],reasoning | |
| 648,llama_2_7b_chat,3.6,theory_of_mind,biggen_240612,[],reasoning | |
| 649,codellama_7b_instruct,3.23,theory_of_mind,biggen_240612,[],reasoning | |
| 650,meta_llama_3_8b_instruct,3.65,theory_of_mind,biggen_240612,[],reasoning | |
| 651,olmo_7b_sft,2.85,theory_of_mind,biggen_240612,[],reasoning | |
| 652,olmo_7b_instruct,3.29,theory_of_mind,biggen_240612,[],reasoning | |
| 653,tulu_2_7b,3.17,theory_of_mind,biggen_240612,[],reasoning | |
| 654,tulu_2_dpo_7b,3.59,theory_of_mind,biggen_240612,[],reasoning | |
| 655,codetulu_2_7b,3.09,theory_of_mind,biggen_240612,[],reasoning | |
| 656,orca_2_7b,2.23,theory_of_mind,biggen_240612,[],reasoning | |
| 657,openchat_3.5_0106,3.56,theory_of_mind,biggen_240612,[],reasoning | |
| 658,openhermes_2_mistral_7b,3.3,theory_of_mind,biggen_240612,[],reasoning | |
| 659,openhermes_2.5_mistral_7b,3.35,theory_of_mind,biggen_240612,[],reasoning | |
| 660,nous_hermes_2_mistral_7b_dpo,3.51,theory_of_mind,biggen_240612,[],reasoning | |
| 661,starling_lm_7b_alpha,3.47,theory_of_mind,biggen_240612,[],reasoning | |
| 662,starling_lm_7b_beta,3.68,theory_of_mind,biggen_240612,[],reasoning | |
| 663,mistral_orpo_alpha,3.47,theory_of_mind,biggen_240612,[],reasoning | |
| 664,mistral_orpo_beta,3.47,theory_of_mind,biggen_240612,[],reasoning | |
| 665,zephyr_7b_beta,3.64,theory_of_mind,biggen_240612,[],reasoning | |
| 666,qwen1.5_14b,3.01,theory_of_mind,biggen_240612,[],reasoning | |
| 667,llama_2_13b,2.61,theory_of_mind,biggen_240612,[],reasoning | |
| 668,codellama_13b,1.98,theory_of_mind,biggen_240612,[],reasoning | |
| 669,solar_10.7b_v1.0,3.21,theory_of_mind,biggen_240612,[],reasoning | |
| 670,qwen1.5_14b_chat,3.51,theory_of_mind,biggen_240612,[],reasoning | |
| 671,solar_10.7b_instruct_v1.0,3.66,theory_of_mind,biggen_240612,[],reasoning | |
| 672,aya_101,1.37,theory_of_mind,biggen_240612,[],reasoning | |
| 673,llama_2_13b_chat,3.65,theory_of_mind,biggen_240612,[],reasoning | |
| 674,codellama_13b_instruct,3.25,theory_of_mind,biggen_240612,[],reasoning | |
| 675,tulu_2_13b,3.23,theory_of_mind,biggen_240612,[],reasoning | |
| 676,tulu_2_dpo_13b,3.61,theory_of_mind,biggen_240612,[],reasoning | |
| 677,codetulu_2_13b,3.31,theory_of_mind,biggen_240612,[],reasoning | |
| 678,orca_2_13b,2.77,theory_of_mind,biggen_240612,[],reasoning | |
| 679,yi_34b,3.26,theory_of_mind,biggen_240612,[],reasoning | |
| 680,llemma_34b,2.51,theory_of_mind,biggen_240612,[],reasoning | |
| 681,qwen1.5_32b,3.24,theory_of_mind,biggen_240612,[],reasoning | |
| 682,codellama_34b,2.56,theory_of_mind,biggen_240612,[],reasoning | |
| 683,mixtral_8x7b_v0.1,3.35,theory_of_mind,biggen_240612,[],reasoning | |
| 684,yi_34b_chat,3.84,theory_of_mind,biggen_240612,[],reasoning | |
| 685,nous_hermes_2_yi_34b,3.43,theory_of_mind,biggen_240612,[],reasoning | |
| 686,codellama_34b_instruct,3.34,theory_of_mind,biggen_240612,[],reasoning | |
| 687,codetulu_2_34b,3.45,theory_of_mind,biggen_240612,[],reasoning | |
| 688,qwen1.5_32b_chat,3.78,theory_of_mind,biggen_240612,[],reasoning | |
| 689,mixtral_8x7b_instruct_v0.1,3.8,theory_of_mind,biggen_240612,[],reasoning | |
| 690,nous_hermes_2_mixtral_8x7b_sft,3.47,theory_of_mind,biggen_240612,[],reasoning | |
| 691,nous_hermes_2_mixtral_8x7b_dpo,3.63,theory_of_mind,biggen_240612,[],reasoning | |
| 692,c4ai_command_r_v01,3.74,theory_of_mind,biggen_240612,[],reasoning | |
| 693,llama_2_70b,3.25,theory_of_mind,biggen_240612,[],reasoning | |
| 694,codellama_70b,2.3,theory_of_mind,biggen_240612,[],reasoning | |
| 695,mixtral_8x22b_v0.1_awq,3.39,theory_of_mind,biggen_240612,[],reasoning | |
| 696,meta_llama_3_70b,2.9,theory_of_mind,biggen_240612,[],reasoning | |
| 697,qwen1.5_72b,3.17,theory_of_mind,biggen_240612,[],reasoning | |
| 698,llama_2_70b_chat,3.75,theory_of_mind,biggen_240612,[],reasoning | |
| 699,codellama_70b_instruct,2.44,theory_of_mind,biggen_240612,[],reasoning | |
| 700,tulu_2_dpo_70b,3.79,theory_of_mind,biggen_240612,[],reasoning | |
| 701,c4ai_command_r_plus_gptq,3.87,theory_of_mind,biggen_240612,[],reasoning | |
| 702,meta_llama_3_70b_instruct,3.92,theory_of_mind,biggen_240612,[],reasoning | |
| 703,mixtral_8x22b_instruct_v0.1_awq,3.74,theory_of_mind,biggen_240612,[],reasoning | |
| 704,zephyr_orpo_141b_a35b_v0.1_awq,3.48,theory_of_mind,biggen_240612,[],reasoning | |
| 705,qwen1.5_72b_chat,3.92,theory_of_mind,biggen_240612,[],reasoning | |
| 706,qwen_110b_chat,3.94,theory_of_mind,biggen_240612,[],reasoning | |
| 707,gpt_3.5_turbo_1106,3.74,theory_of_mind,biggen_240612,[],reasoning | |
| 708,gpt_3.5_turbo_0125,3.8,theory_of_mind,biggen_240612,[],reasoning | |
| 709,gpt_4_1106_preview,4.07,theory_of_mind,biggen_240612,[],reasoning | |
| 710,gpt_4_0125_preview,4.21,theory_of_mind,biggen_240612,[],reasoning | |
| 711,gpt_4_turbo_2024_04_09,4.03,theory_of_mind,biggen_240612,[],reasoning | |
| 712,gpt_4o_2024_05_13,4.04,theory_of_mind,biggen_240612,[],reasoning | |
| 713,mistral_medium_hjpark,3.85,theory_of_mind,biggen_240612,[],reasoning | |
| 714,mistral_large_hjpark,3.93,theory_of_mind,biggen_240612,[],reasoning | |
| 715,gemini_1.0_pro,3.83,theory_of_mind,biggen_240612,[],reasoning | |
| 716,gemini_pro_1.5,3.96,theory_of_mind,biggen_240612,[],reasoning | |
| 717,gemini_flash_1.5,3.89,theory_of_mind,biggen_240612,[],reasoning | |
| 718,claude_3_haiku_20240307,3.97,theory_of_mind,biggen_240612,[],reasoning | |
| 719,claude_3_sonnet_20240229,3.97,theory_of_mind,biggen_240612,[],reasoning | |
| 720,claude_3_opus_20240229,4.08,theory_of_mind,biggen_240612,[],reasoning | |
| 721,phi_1,1.012,tool_usage,biggen_240612,[],other | |
| 722,phi_1_5,1.3,tool_usage,biggen_240612,[],other | |
| 723,phi_2,1.675,tool_usage,biggen_240612,[],other | |
| 724,qwen1.5_0.5b,1.15,tool_usage,biggen_240612,[],other | |
| 725,qwen1.5_1.8b,1.413,tool_usage,biggen_240612,[],other | |
| 726,qwen1.5_4b,1.688,tool_usage,biggen_240612,[],other | |
| 727,gemma_2b,1.35,tool_usage,biggen_240612,[],other | |
| 728,olmo_1b,1.087,tool_usage,biggen_240612,[],other | |
| 729,qwen1.5_0.5b_chat,1.25,tool_usage,biggen_240612,[],other | |
| 730,qwen1.5_1.8b_chat,1.688,tool_usage,biggen_240612,[],other | |
| 731,qwen1.5_4b_chat,2.05,tool_usage,biggen_240612,[],other | |
| 732,phi_3_mini_4k_instruct,3.112,tool_usage,biggen_240612,[],other | |
| 733,phi_3_mini_128k_instruct,2.7,tool_usage,biggen_240612,[],other | |
| 734,gemma_2b_it,1.812,tool_usage,biggen_240612,[],other | |
| 735,gemma_1.1_2b_it,1.675,tool_usage,biggen_240612,[],other | |
| 736,gemma_7b,1.025,tool_usage,biggen_240612,[],other | |
| 737,mistral_7b_v0.1,2.038,tool_usage,biggen_240612,[],other | |
| 738,mistral_7b_v0.2,1.962,tool_usage,biggen_240612,[],other | |
| 739,qwen1.5_7b,2.212,tool_usage,biggen_240612,[],other | |
| 740,yi_6b,1.425,tool_usage,biggen_240612,[],other | |
| 741,llama_2_7b,1.337,tool_usage,biggen_240612,[],other | |
| 742,codellama_7b,1.387,tool_usage,biggen_240612,[],other | |
| 743,meta_llama_3_8b,1.738,tool_usage,biggen_240612,[],other | |
| 744,llemma_7b,1.575,tool_usage,biggen_240612,[],other | |
| 745,olmo_7b,1.15,tool_usage,biggen_240612,[],other | |
| 746,gemma_7b_it,2.125,tool_usage,biggen_240612,[],other | |
| 747,gemma_1.1_7b_it,2.562,tool_usage,biggen_240612,[],other | |
| 748,mistral_7b_instruct_v0.2,3.175,tool_usage,biggen_240612,[],other | |
| 749,qwen1.5_7b_chat,3.013,tool_usage,biggen_240612,[],other | |
| 750,yi_6b_chat,2.05,tool_usage,biggen_240612,[],other | |
| 751,llama_2_7b_chat,2.075,tool_usage,biggen_240612,[],other | |
| 752,codellama_7b_instruct,2.288,tool_usage,biggen_240612,[],other | |
| 753,meta_llama_3_8b_instruct,3.263,tool_usage,biggen_240612,[],other | |
| 754,olmo_7b_sft,1.887,tool_usage,biggen_240612,[],other | |
| 755,olmo_7b_instruct,1.875,tool_usage,biggen_240612,[],other | |
| 756,tulu_2_7b,2.062,tool_usage,biggen_240612,[],other | |
| 757,tulu_2_dpo_7b,2.325,tool_usage,biggen_240612,[],other | |
| 758,codetulu_2_7b,2.65,tool_usage,biggen_240612,[],other | |
| 759,orca_2_7b,1.462,tool_usage,biggen_240612,[],other | |
| 760,openchat_3.5_0106,2.9,tool_usage,biggen_240612,[],other | |
| 761,openhermes_2_mistral_7b,2.663,tool_usage,biggen_240612,[],other | |
| 762,openhermes_2.5_mistral_7b,2.65,tool_usage,biggen_240612,[],other | |
| 763,nous_hermes_2_mistral_7b_dpo,2.837,tool_usage,biggen_240612,[],other | |
| 764,starling_lm_7b_alpha,2.95,tool_usage,biggen_240612,[],other | |
| 765,starling_lm_7b_beta,3.388,tool_usage,biggen_240612,[],other | |
| 766,mistral_orpo_alpha,2.675,tool_usage,biggen_240612,[],other | |
| 767,mistral_orpo_beta,2.775,tool_usage,biggen_240612,[],other | |
| 768,zephyr_7b_beta,3.175,tool_usage,biggen_240612,[],other | |
| 769,qwen1.5_14b,2.788,tool_usage,biggen_240612,[],other | |
| 770,llama_2_13b,1.575,tool_usage,biggen_240612,[],other | |
| 771,codellama_13b,1.525,tool_usage,biggen_240612,[],other | |
| 772,solar_10.7b_v1.0,2.312,tool_usage,biggen_240612,[],other | |
| 773,qwen1.5_14b_chat,3.075,tool_usage,biggen_240612,[],other | |
| 774,solar_10.7b_instruct_v1.0,3.188,tool_usage,biggen_240612,[],other | |
| 775,aya_101,1.163,tool_usage,biggen_240612,[],other | |
| 776,llama_2_13b_chat,2.3,tool_usage,biggen_240612,[],other | |
| 777,codellama_13b_instruct,2.388,tool_usage,biggen_240612,[],other | |
| 778,tulu_2_13b,2.5,tool_usage,biggen_240612,[],other | |
| 779,tulu_2_dpo_13b,2.763,tool_usage,biggen_240612,[],other | |
| 780,codetulu_2_13b,3.013,tool_usage,biggen_240612,[],other | |
| 781,orca_2_13b,2.013,tool_usage,biggen_240612,[],other | |
| 782,yi_34b,2.3,tool_usage,biggen_240612,[],other | |
| 783,llemma_34b,1.887,tool_usage,biggen_240612,[],other | |
| 784,qwen1.5_32b,2.712,tool_usage,biggen_240612,[],other | |
| 785,codellama_34b,1.875,tool_usage,biggen_240612,[],other | |
| 786,mixtral_8x7b_v0.1,2.538,tool_usage,biggen_240612,[],other | |
| 787,yi_34b_chat,3.075,tool_usage,biggen_240612,[],other | |
| 788,nous_hermes_2_yi_34b,3.013,tool_usage,biggen_240612,[],other | |
| 789,codellama_34b_instruct,2.487,tool_usage,biggen_240612,[],other | |
| 790,codetulu_2_34b,3.2,tool_usage,biggen_240612,[],other | |
| 791,qwen1.5_32b_chat,3.55,tool_usage,biggen_240612,[],other | |
| 792,mixtral_8x7b_instruct_v0.1,3.237,tool_usage,biggen_240612,[],other | |
| 793,nous_hermes_2_mixtral_8x7b_sft,3.288,tool_usage,biggen_240612,[],other | |
| 794,nous_hermes_2_mixtral_8x7b_dpo,3.413,tool_usage,biggen_240612,[],other | |
| 795,c4ai_command_r_v01,2.987,tool_usage,biggen_240612,[],other | |
| 796,llama_2_70b,2.487,tool_usage,biggen_240612,[],other | |
| 797,codellama_70b,2.138,tool_usage,biggen_240612,[],other | |
| 798,mixtral_8x22b_v0.1_awq,2.875,tool_usage,biggen_240612,[],other | |
| 799,meta_llama_3_70b,2.388,tool_usage,biggen_240612,[],other | |
| 800,qwen1.5_72b,2.875,tool_usage,biggen_240612,[],other | |
| 801,llama_2_70b_chat,2.875,tool_usage,biggen_240612,[],other | |
| 802,codellama_70b_instruct,1.712,tool_usage,biggen_240612,[],other | |
| 803,tulu_2_dpo_70b,3.5,tool_usage,biggen_240612,[],other | |
| 804,c4ai_command_r_plus_gptq,3.475,tool_usage,biggen_240612,[],other | |
| 805,meta_llama_3_70b_instruct,3.625,tool_usage,biggen_240612,[],other | |
| 806,mixtral_8x22b_instruct_v0.1_awq,3.462,tool_usage,biggen_240612,[],other | |
| 807,zephyr_orpo_141b_a35b_v0.1_awq,3.062,tool_usage,biggen_240612,[],other | |
| 808,qwen1.5_72b_chat,3.388,tool_usage,biggen_240612,[],other | |
| 809,qwen_110b_chat,3.438,tool_usage,biggen_240612,[],other | |
| 810,gpt_3.5_turbo_1106,3.062,tool_usage,biggen_240612,[],other | |
| 811,gpt_3.5_turbo_0125,2.987,tool_usage,biggen_240612,[],other | |
| 812,gpt_4_1106_preview,3.7,tool_usage,biggen_240612,[],other | |
| 813,gpt_4_0125_preview,3.675,tool_usage,biggen_240612,[],other | |
| 814,gpt_4_turbo_2024_04_09,3.712,tool_usage,biggen_240612,[],other | |
| 815,gpt_4o_2024_05_13,3.775,tool_usage,biggen_240612,[],other | |
| 816,mistral_medium_hjpark,3.737,tool_usage,biggen_240612,[],other | |
| 817,mistral_large_hjpark,3.638,tool_usage,biggen_240612,[],other | |
| 818,gemini_1.0_pro,3.138,tool_usage,biggen_240612,[],other | |
| 819,gemini_pro_1.5,3.337,tool_usage,biggen_240612,[],other | |
| 820,gemini_flash_1.5,3.337,tool_usage,biggen_240612,[],other | |
| 821,claude_3_haiku_20240307,3.775,tool_usage,biggen_240612,[],other | |
| 822,claude_3_sonnet_20240229,3.663,tool_usage,biggen_240612,[],other | |
| 823,claude_3_opus_20240229,3.775,tool_usage,biggen_240612,[],other | |
| 0,aya_101,0.029411764705882353,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 1,c4ai_command_r_plus_gptq,0.8382352941176471,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 2,c4ai_command_r_v01,0.6948529411764706,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 3,claude_3_haiku_20240307,0.9252450980392157,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 4,claude_3_opus_20240229,0.9681372549019608,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 5,claude_3_sonnet_20240229,0.9240196078431373,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 6,codellama_13b,0.07598039215686275,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 7,codellama_13b_instruct,0.4276960784313726,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 8,codellama_34b,0.1482843137254902,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 9,codellama_34b_instruct,0.5098039215686274,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 10,codellama_70b,0.18872549019607843,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 11,codellama_70b_instruct,0.27450980392156865,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 12,codellama_7b,0.05514705882352941,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 13,codellama_7b_instruct,0.36519607843137253,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 14,codetulu_2_13b,0.43137254901960786,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 15,codetulu_2_34b,0.5441176470588235,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 16,codetulu_2_7b,0.32598039215686275,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 17,gemini_1.0_pro,0.7107843137254902,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 18,gemini_flash_1.5,0.866421568627451,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 19,gemini_pro_1.5,0.8676470588235294,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 20,gemma_1.1_2b_it,0.33578431372549017,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 21,gemma_1.1_7b_it,0.5551470588235294,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 22,gemma_2b,0.09803921568627451,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 23,gemma_2b_it,0.3333333333333333,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 24,gemma_7b,0.013480392156862746,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 25,gemma_7b_it,0.40931372549019607,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 26,gpt_3.5_turbo_0125,0.7757352941176471,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 27,gpt_3.5_turbo_1106,0.758578431372549,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 28,gpt_4_0125_preview,0.9779411764705882,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 29,gpt_4_1106_preview,0.9889705882352942,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 30,gpt_4_turbo_2024_04_09,0.9558823529411765,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 31,gpt_4o_2024_05_13,0.9436274509803921,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 32,llama_2_13b,0.20220588235294118,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 33,llama_2_13b_chat,0.5968137254901961,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 34,llama_2_70b,0.4656862745098039,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 35,llama_2_70b_chat,0.7205882352941176,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 36,llama_2_7b,0.1446078431372549,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 37,llama_2_7b_chat,0.5355392156862745,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 38,llemma_34b,0.21200980392156862,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 39,llemma_7b,0.11029411764705882,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 40,meta_llama_3_70b,0.36887254901960786,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 41,meta_llama_3_70b_instruct,0.875,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 42,meta_llama_3_8b,0.2377450980392157,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 43,meta_llama_3_8b_instruct,0.7328431372549019,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 44,mistral_7b_instruct_v0.2,0.7156862745098039,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 45,mistral_7b_v0.1,0.3272058823529412,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 46,mistral_7b_v0.2,0.3137254901960784,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 47,mistral_large_hjpark,0.8762254901960784,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 48,mistral_medium_hjpark,0.8970588235294118,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 49,mistral_orpo_alpha,0.5392156862745098,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 50,mistral_orpo_beta,0.5477941176470589,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 51,mixtral_8x22b_instruct_v0.1_awq,0.8198529411764706,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 52,mixtral_8x22b_v0.1_awq,0.5968137254901961,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 53,mixtral_8x7b_instruct_v0.1,0.7647058823529411,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 54,mixtral_8x7b_v0.1,0.5453431372549019,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 55,nous_hermes_2_mistral_7b_dpo,0.571078431372549,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 56,nous_hermes_2_mixtral_8x7b_dpo,0.7095588235294118,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 57,nous_hermes_2_mixtral_8x7b_sft,0.6262254901960784,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 58,nous_hermes_2_yi_34b,0.5906862745098039,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 59,olmo_1b,0.028186274509803922,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 60,olmo_7b,0.07107843137254902,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 61,olmo_7b_instruct,0.30269607843137253,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 62,olmo_7b_sft,0.2549019607843137,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 63,openchat_3.5_0106,0.6825980392156863,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 64,openhermes_2.5_mistral_7b,0.4583333333333333,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 65,openhermes_2_mistral_7b,0.5122549019607843,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 66,orca_2_13b,0.17401960784313725,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 67,orca_2_7b,0.08700980392156862,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 68,phi_1,0.0,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 69,phi_1_5,0.15318627450980393,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 70,phi_2,0.29044117647058826,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 71,phi_3_mini_128k_instruct,0.6911764705882353,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 72,phi_3_mini_4k_instruct,0.7867647058823529,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 73,qwen1.5_0.5b,0.0428921568627451,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 74,qwen1.5_0.5b_chat,0.07965686274509803,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 75,qwen1.5_1.8b,0.12867647058823528,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 76,qwen1.5_1.8b_chat,0.21691176470588236,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 77,qwen1.5_14b,0.3946078431372549,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 78,qwen1.5_14b_chat,0.7267156862745098,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 79,qwen1.5_32b,0.4791666666666667,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 80,qwen1.5_32b_chat,0.8149509803921569,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 81,qwen1.5_4b,0.21323529411764705,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 82,qwen1.5_4b_chat,0.29411764705882354,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 83,qwen1.5_72b,0.5294117647058824,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 84,qwen1.5_72b_chat,0.8713235294117647,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 85,qwen1.5_7b,0.2610294117647059,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 86,qwen1.5_7b_chat,0.6580882352941176,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 87,qwen_110b_chat,0.8848039215686274,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 88,solar_10.7b_instruct_v1.0,0.6862745098039216,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 89,solar_10.7b_v1.0,0.43995098039215685,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 90,starling_lm_7b_alpha,0.6139705882352942,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 91,starling_lm_7b_beta,0.7573529411764706,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 92,tulu_2_13b,0.4313725490196078,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 93,tulu_2_7b,0.3553921568627451,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 94,tulu_2_dpo_13b,0.5833333333333333,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 95,tulu_2_dpo_70b,0.7708333333333334,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 96,tulu_2_dpo_7b,0.4767156862745098,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 97,yi_34b,0.46078431372549017,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 98,yi_34b_chat,0.7720588235294118,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 99,yi_6b,0.17892156862745098,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 100,yi_6b_chat,0.4117647058823529,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 101,zephyr_7b_beta,0.6200980392156863,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 102,zephyr_orpo_141b_a35b_v0.1_awq,0.6311274509803921,biggen_mwr,biggen_240612,"['grounding', 'instruction_following', 'planning', 'reasoning', 'refinement', 'safety', 'theory_of_mind', 'tool_usage']", | |
| 0,gpt_4o_0513,35.7,wildbench_mix,wildbench_240612,[],holistic | |
| 1,gpt_4_turbo_0409,34.6,wildbench_mix,wildbench_240612,[],holistic | |
| 2,gpt_4_turbo_0125,29.9,wildbench_mix,wildbench_240612,[],holistic | |
| 3,gemini_1.5_pro,27.8,wildbench_mix,wildbench_240612,[],holistic | |
| 4,llama_3_70b_inst,21.0,wildbench_mix,wildbench_240612,[],holistic | |
| 5,claude_3_opus,20.1,wildbench_mix,wildbench_240612,[],holistic | |
| 6,gemini_1.5_flash,17.4,wildbench_mix,wildbench_240612,[],holistic | |
| 7,yi_1.5_34b_chat,16.8,wildbench_mix,wildbench_240612,[],holistic | |
| 8,llama3_inst_8b_simpo,14.0,wildbench_mix,wildbench_240612,[],holistic | |
| 9,claude_3_sonnet,7.2,wildbench_mix,wildbench_240612,[],holistic | |
| 10,qwen1.5_72b_chat,4.4,wildbench_mix,wildbench_240612,[],holistic | |
| 11,command_r_plus,0.4,wildbench_mix,wildbench_240612,[],holistic | |
| 12,claude_3_haiku,-8.5,wildbench_mix,wildbench_240612,[],holistic | |
| 13,mistral_large,-10.5,wildbench_mix,wildbench_240612,[],holistic | |
| 14,starlinglm_7b_beta,-11.9,wildbench_mix,wildbench_240612,[],holistic | |
| 15,llama_3_8b_inst,-14.6,wildbench_mix,wildbench_240612,[],holistic | |
| 16,command_r,-16.0,wildbench_mix,wildbench_240612,[],holistic | |
| 17,mixtral_8x7b_inst,-18.8,wildbench_mix,wildbench_240612,[],holistic | |
| 18,dbrx_instruct,-21.6,wildbench_mix,wildbench_240612,[],holistic | |
| 19,yi_1.5_6b_chat,-24.3,wildbench_mix,wildbench_240612,[],holistic | |
| 20,mistral_7b_inst_v0.2,-25.0,wildbench_mix,wildbench_240612,[],holistic | |
| 21,tulu_2_dpo_70b,-25.4,wildbench_mix,wildbench_240612,[],holistic | |
| 22,llama_2_70b_chat,-26.8,wildbench_mix,wildbench_240612,[],holistic | |
| 23,qwen1.5_7b_chat,-27.0,wildbench_mix,wildbench_240612,[],holistic | |
| 24,phi_3_medium_128k,-33.3,wildbench_mix,wildbench_240612,[],holistic | |
| 25,gpt_3.5_turbo_0125,-33.5,wildbench_mix,wildbench_240612,[],holistic | |
| 26,llama_2_7b_chat,-48.0,wildbench_mix,wildbench_240612,[],holistic | |
| 27,gemma_7b_it,-57.0,wildbench_mix,wildbench_240612,[],holistic | |
| 28,gemma_2b_it,-74.1,wildbench_mix,wildbench_240612,[],holistic | |
| 29,gpt_4o_0513,1.5,wildbench_gpt4t,wildbench_240612,[],holistic | |
| 30,gpt_4_turbo_0409,0.0,wildbench_gpt4t,wildbench_240612,[],holistic | |
| 31,gpt_4_turbo_0125,4.4,wildbench_gpt4t,wildbench_240612,[],holistic | |
| 32,gemini_1.5_pro,-4.4,wildbench_gpt4t,wildbench_240612,[],holistic | |
| 33,llama_3_70b_inst,-19.0,wildbench_gpt4t,wildbench_240612,[],holistic | |
| 34,claude_3_opus,-20.4,wildbench_gpt4t,wildbench_240612,[],holistic | |
| 35,gemini_1.5_flash,-16.6,wildbench_gpt4t,wildbench_240612,[],holistic | |
| 36,yi_1.5_34b_chat,-18.3,wildbench_gpt4t,wildbench_240612,[],holistic | |
| 37,llama3_inst_8b_simpo,-22.5,wildbench_gpt4t,wildbench_240612,[],holistic | |
| 38,claude_3_sonnet,-31.6,wildbench_gpt4t,wildbench_240612,[],holistic | |
| 39,qwen1.5_72b_chat,-34.8,wildbench_gpt4t,wildbench_240612,[],holistic | |
| 40,command_r_plus,-36.3,wildbench_gpt4t,wildbench_240612,[],holistic | |
| 41,claude_3_haiku,-46.9,wildbench_gpt4t,wildbench_240612,[],holistic | |
| 42,mistral_large,-48.1,wildbench_gpt4t,wildbench_240612,[],holistic | |
| 43,starlinglm_7b_beta,-48.7,wildbench_gpt4t,wildbench_240612,[],holistic | |
| 44,llama_3_8b_inst,-49.8,wildbench_gpt4t,wildbench_240612,[],holistic | |
| 45,command_r,-48.4,wildbench_gpt4t,wildbench_240612,[],holistic | |
| 46,mixtral_8x7b_inst,-53.4,wildbench_gpt4t,wildbench_240612,[],holistic | |
| 47,dbrx_instruct,-57.3,wildbench_gpt4t,wildbench_240612,[],holistic | |
| 48,yi_1.5_6b_chat,-55.0,wildbench_gpt4t,wildbench_240612,[],holistic | |
| 49,mistral_7b_inst_v0.2,-58.1,wildbench_gpt4t,wildbench_240612,[],holistic | |
| 50,tulu_2_dpo_70b,-59.3,wildbench_gpt4t,wildbench_240612,[],holistic | |
| 51,llama_2_70b_chat,-56.9,wildbench_gpt4t,wildbench_240612,[],holistic | |
| 52,qwen1.5_7b_chat,-57.7,wildbench_gpt4t,wildbench_240612,[],holistic | |
| 53,phi_3_medium_128k,-66.4,wildbench_gpt4t,wildbench_240612,[],holistic | |
| 54,gpt_3.5_turbo_0125,-66.3,wildbench_gpt4t,wildbench_240612,[],holistic | |
| 55,llama_2_7b_chat,-71.8,wildbench_gpt4t,wildbench_240612,[],holistic | |
| 56,gemma_7b_it,-78.4,wildbench_gpt4t,wildbench_240612,[],holistic | |
| 57,gemma_2b_it,-87.8,wildbench_gpt4t,wildbench_240612,[],holistic | |
| 58,gpt_4o_0513,46.3,wildbench_haiku,wildbench_240612,[],holistic | |
| 59,gpt_4_turbo_0409,45.3,wildbench_haiku,wildbench_240612,[],holistic | |
| 60,gpt_4_turbo_0125,38.8,wildbench_haiku,wildbench_240612,[],holistic | |
| 61,gemini_1.5_pro,37.9,wildbench_haiku,wildbench_240612,[],holistic | |
| 62,llama_3_70b_inst,31.9,wildbench_haiku,wildbench_240612,[],holistic | |
| 63,claude_3_opus,34.3,wildbench_haiku,wildbench_240612,[],holistic | |
| 64,gemini_1.5_flash,26.3,wildbench_haiku,wildbench_240612,[],holistic | |
| 65,yi_1.5_34b_chat,24.1,wildbench_haiku,wildbench_240612,[],holistic | |
| 66,llama3_inst_8b_simpo,18.9,wildbench_haiku,wildbench_240612,[],holistic | |
| 67,claude_3_sonnet,19.4,wildbench_haiku,wildbench_240612,[],holistic | |
| 68,qwen1.5_72b_chat,13.1,wildbench_haiku,wildbench_240612,[],holistic | |
| 69,command_r_plus,7.4,wildbench_haiku,wildbench_240612,[],holistic | |
| 70,claude_3_haiku,0.0,wildbench_haiku,wildbench_240612,[],holistic | |
| 71,mistral_large,-4.0,wildbench_haiku,wildbench_240612,[],holistic | |
| 72,starlinglm_7b_beta,-5.0,wildbench_haiku,wildbench_240612,[],holistic | |
| 73,llama_3_8b_inst,-9.7,wildbench_haiku,wildbench_240612,[],holistic | |
| 74,command_r,-12.7,wildbench_haiku,wildbench_240612,[],holistic | |
| 75,mixtral_8x7b_inst,-13.5,wildbench_haiku,wildbench_240612,[],holistic | |
| 76,dbrx_instruct,-16.3,wildbench_haiku,wildbench_240612,[],holistic | |
| 77,yi_1.5_6b_chat,-19.9,wildbench_haiku,wildbench_240612,[],holistic | |
| 78,mistral_7b_inst_v0.2,-22.4,wildbench_haiku,wildbench_240612,[],holistic | |
| 79,tulu_2_dpo_70b,-20.3,wildbench_haiku,wildbench_240612,[],holistic | |
| 80,llama_2_70b_chat,-23.6,wildbench_haiku,wildbench_240612,[],holistic | |
| 81,qwen1.5_7b_chat,-23.0,wildbench_haiku,wildbench_240612,[],holistic | |
| 82,phi_3_medium_128k,-30.0,wildbench_haiku,wildbench_240612,[],holistic | |
| 83,gpt_3.5_turbo_0125,-30.0,wildbench_haiku,wildbench_240612,[],holistic | |
| 84,llama_2_7b_chat,-44.6,wildbench_haiku,wildbench_240612,[],holistic | |
| 85,gemma_7b_it,-55.8,wildbench_haiku,wildbench_240612,[],holistic | |
| 86,gemma_2b_it,-73.6,wildbench_haiku,wildbench_240612,[],holistic | |
| 87,gpt_4o_0513,59.3,wildbench_llama2,wildbench_240612,[],holistic | |
| 88,gpt_4_turbo_0409,58.4,wildbench_llama2,wildbench_240612,[],holistic | |
| 89,gpt_4_turbo_0125,55.2,wildbench_llama2,wildbench_240612,[],holistic | |
| 90,gemini_1.5_pro,50.0,wildbench_llama2,wildbench_240612,[],holistic | |
| 91,llama_3_70b_inst,50.2,wildbench_llama2,wildbench_240612,[],holistic | |
| 92,claude_3_opus,46.3,wildbench_llama2,wildbench_240612,[],holistic | |
| 93,gemini_1.5_flash,42.5,wildbench_llama2,wildbench_240612,[],holistic | |
| 94,yi_1.5_34b_chat,44.5,wildbench_llama2,wildbench_240612,[],holistic | |
| 95,llama3_inst_8b_simpo,45.7,wildbench_llama2,wildbench_240612,[],holistic | |
| 96,claude_3_sonnet,33.9,wildbench_llama2,wildbench_240612,[],holistic | |
| 97,qwen1.5_72b_chat,34.7,wildbench_llama2,wildbench_240612,[],holistic | |
| 98,command_r_plus,30.2,wildbench_llama2,wildbench_240612,[],holistic | |
| 99,claude_3_haiku,21.4,wildbench_llama2,wildbench_240612,[],holistic | |
| 100,mistral_large,20.5,wildbench_llama2,wildbench_240612,[],holistic | |
| 101,starlinglm_7b_beta,18.0,wildbench_llama2,wildbench_240612,[],holistic | |
| 102,llama_3_8b_inst,15.7,wildbench_llama2,wildbench_240612,[],holistic | |
| 103,command_r,13.1,wildbench_llama2,wildbench_240612,[],holistic | |
| 104,mixtral_8x7b_inst,10.4,wildbench_llama2,wildbench_240612,[],holistic | |
| 105,dbrx_instruct,8.7,wildbench_llama2,wildbench_240612,[],holistic | |
| 106,yi_1.5_6b_chat,2.1,wildbench_llama2,wildbench_240612,[],holistic | |
| 107,mistral_7b_inst_v0.2,5.5,wildbench_llama2,wildbench_240612,[],holistic | |
| 108,tulu_2_dpo_70b,3.3,wildbench_llama2,wildbench_240612,[],holistic | |
| 109,llama_2_70b_chat,0.0,wildbench_llama2,wildbench_240612,[],holistic | |
| 110,qwen1.5_7b_chat,-0.2,wildbench_llama2,wildbench_240612,[],holistic | |
| 111,phi_3_medium_128k,-3.6,wildbench_llama2,wildbench_240612,[],holistic | |
| 112,gpt_3.5_turbo_0125,-4.1,wildbench_llama2,wildbench_240612,[],holistic | |
| 113,llama_2_7b_chat,-27.8,wildbench_llama2,wildbench_240612,[],holistic | |
| 114,gemma_7b_it,-36.8,wildbench_llama2,wildbench_240612,[],holistic | |
| 115,gemma_2b_it,-60.8,wildbench_llama2,wildbench_240612,[],holistic | |
| 116,gpt_4o_0513,65.3,wb_score,wildbench_240612,[],holistic | |
| 117,gpt_4_turbo_0409,64.7,wb_score,wildbench_240612,[],holistic | |
| 118,gpt_4_turbo_0125,63.3,wb_score,wildbench_240612,[],holistic | |
| 119,gemini_1.5_pro,55.7,wb_score,wildbench_240612,[],holistic | |
| 120,llama_3_70b_inst,60.4,wb_score,wildbench_240612,[],holistic | |
| 121,claude_3_opus,63.1,wb_score,wildbench_240612,[],holistic | |
| 122,gemini_1.5_flash,53.1,wb_score,wildbench_240612,[],holistic | |
| 123,yi_1.5_34b_chat,57.8,wb_score,wildbench_240612,[],holistic | |
| 124,llama3_inst_8b_simpo,53.9,wb_score,wildbench_240612,[],holistic | |
| 125,claude_3_sonnet,55.5,wb_score,wildbench_240612,[],holistic | |
| 126,qwen1.5_72b_chat,56.5,wb_score,wildbench_240612,[],holistic | |
| 127,command_r_plus,51.4,wb_score,wildbench_240612,[],holistic | |
| 128,claude_3_haiku,50.4,wb_score,wildbench_240612,[],holistic | |
| 129,mistral_large,54.2,wb_score,wildbench_240612,[],holistic | |
| 130,starlinglm_7b_beta,46.8,wb_score,wildbench_240612,[],holistic | |
| 131,llama_3_8b_inst,45.7,wb_score,wildbench_240612,[],holistic | |
| 132,command_r,45.7,wb_score,wildbench_240612,[],holistic | |
| 133,mixtral_8x7b_inst,47.8,wb_score,wildbench_240612,[],holistic | |
| 134,dbrx_instruct,48.9,wb_score,wildbench_240612,[],holistic | |
| 135,yi_1.5_6b_chat,39.6,wb_score,wildbench_240612,[],holistic | |
| 136,mistral_7b_inst_v0.2,43.4,wb_score,wildbench_240612,[],holistic | |
| 137,tulu_2_dpo_70b,45.2,wb_score,wildbench_240612,[],holistic | |
| 138,llama_2_70b_chat,39.2,wb_score,wildbench_240612,[],holistic | |
| 139,qwen1.5_7b_chat,40.0,wb_score,wildbench_240612,[],holistic | |
| 140,phi_3_medium_128k,42.1,wb_score,wildbench_240612,[],holistic | |
| 141,gpt_3.5_turbo_0125,42.1,wb_score,wildbench_240612,[],holistic | |
| 142,llama_2_7b_chat,27.6,wb_score,wildbench_240612,[],holistic | |
| 143,gemma_7b_it,23.9,wb_score,wildbench_240612,[],holistic | |
| 144,gemma_2b_it,6.2,wb_score,wildbench_240612,[],holistic | |
| 145,gpt_4o_0513,1293.0,arena_elo,wildbench_240612,[],holistic | |
| 146,gpt_4_turbo_0409,1251.0,arena_elo,wildbench_240612,[],holistic | |
| 147,gpt_4_turbo_0125,1239.0,arena_elo,wildbench_240612,[],holistic | |
| 149,llama_3_70b_inst,1213.0,arena_elo,wildbench_240612,[],holistic | |
| 150,claude_3_opus,1232.0,arena_elo,wildbench_240612,[],holistic | |
| 154,claude_3_sonnet,1187.0,arena_elo,wildbench_240612,[],holistic | |
| 155,qwen1.5_72b_chat,1143.0,arena_elo,wildbench_240612,[],holistic | |
| 156,command_r_plus,1155.0,arena_elo,wildbench_240612,[],holistic | |
| 157,claude_3_haiku,1169.0,arena_elo,wildbench_240612,[],holistic | |
| 158,mistral_large,1158.0,arena_elo,wildbench_240612,[],holistic | |
| 159,starlinglm_7b_beta,1111.0,arena_elo,wildbench_240612,[],holistic | |
| 160,llama_3_8b_inst,1144.0,arena_elo,wildbench_240612,[],holistic | |
| 161,command_r,1106.0,arena_elo,wildbench_240612,[],holistic | |
| 162,mixtral_8x7b_inst,1114.0,arena_elo,wildbench_240612,[],holistic | |
| 163,dbrx_instruct,1106.0,arena_elo,wildbench_240612,[],holistic | |
| 165,mistral_7b_inst_v0.2,1071.0,arena_elo,wildbench_240612,[],holistic | |
| 166,tulu_2_dpo_70b,1099.0,arena_elo,wildbench_240612,[],holistic | |
| 167,llama_2_70b_chat,1070.0,arena_elo,wildbench_240612,[],holistic | |
| 168,qwen1.5_7b_chat,1059.0,arena_elo,wildbench_240612,[],holistic | |
| 170,gpt_3.5_turbo_0125,1105.0,arena_elo,wildbench_240612,[],holistic | |
| 171,llama_2_7b_chat,1012.0,arena_elo,wildbench_240612,[],holistic | |
| 172,gemma_7b_it,1047.0,arena_elo,wildbench_240612,[],holistic | |
| 173,gemma_2b_it,980.0,arena_elo,wildbench_240612,[],holistic | |
| 175,gpt_4_turbo_0409,82.6,arena_hard,wildbench_240612,[],holistic | |
| 176,gpt_4_turbo_0125,78.0,arena_hard,wildbench_240612,[],holistic | |
| 178,llama_3_70b_inst,41.1,arena_hard,wildbench_240612,[],holistic | |
| 179,claude_3_opus,60.4,arena_hard,wildbench_240612,[],holistic | |
| 182,llama3_inst_8b_simpo,33.8,arena_hard,wildbench_240612,[],holistic | |
| 183,claude_3_sonnet,46.8,arena_hard,wildbench_240612,[],holistic | |
| 184,qwen1.5_72b_chat,36.1,arena_hard,wildbench_240612,[],holistic | |
| 185,command_r_plus,33.1,arena_hard,wildbench_240612,[],holistic | |
| 186,claude_3_haiku,41.5,arena_hard,wildbench_240612,[],holistic | |
| 187,mistral_large,37.7,arena_hard,wildbench_240612,[],holistic | |
| 188,starlinglm_7b_beta,23.0,arena_hard,wildbench_240612,[],holistic | |
| 189,llama_3_8b_inst,20.6,arena_hard,wildbench_240612,[],holistic | |
| 190,command_r,17.0,arena_hard,wildbench_240612,[],holistic | |
| 191,mixtral_8x7b_inst,23.4,arena_hard,wildbench_240612,[],holistic | |
| 192,dbrx_instruct,23.9,arena_hard,wildbench_240612,[],holistic | |
| 195,tulu_2_dpo_70b,15.0,arena_hard,wildbench_240612,[],holistic | |
| 196,llama_2_70b_chat,11.6,arena_hard,wildbench_240612,[],holistic | |
| 199,gpt_3.5_turbo_0125,23.3,arena_hard,wildbench_240612,[],holistic | |
| 200,llama_2_7b_chat,4.6,arena_hard,wildbench_240612,[],holistic | |
| 201,gemma_7b_it,7.5,arena_hard,wildbench_240612,[],holistic | |
| 202,gemma_2b_it,3.0,arena_hard,wildbench_240612,[],holistic | |
| 203,gpt_4o_0513,57.5,alpacaeval2_lc,wildbench_240612,[],holistic | |
| 204,gpt_4_turbo_0409,55.0,alpacaeval2_lc,wildbench_240612,[],holistic | |
| 207,llama_3_70b_inst,34.4,alpacaeval2_lc,wildbench_240612,[],holistic | |
| 208,claude_3_opus,40.5,alpacaeval2_lc,wildbench_240612,[],holistic | |
| 211,llama3_inst_8b_simpo,44.7,alpacaeval2_lc,wildbench_240612,[],holistic | |
| 212,claude_3_sonnet,34.9,alpacaeval2_lc,wildbench_240612,[],holistic | |
| 213,qwen1.5_72b_chat,36.6,alpacaeval2_lc,wildbench_240612,[],holistic | |
| 216,mistral_large,32.7,alpacaeval2_lc,wildbench_240612,[],holistic | |
| 218,llama_3_8b_inst,22.9,alpacaeval2_lc,wildbench_240612,[],holistic | |
| 220,mixtral_8x7b_inst,23.7,alpacaeval2_lc,wildbench_240612,[],holistic | |
| 221,dbrx_instruct,25.4,alpacaeval2_lc,wildbench_240612,[],holistic | |
| 223,mistral_7b_inst_v0.2,17.1,alpacaeval2_lc,wildbench_240612,[],holistic | |
| 224,tulu_2_dpo_70b,21.2,alpacaeval2_lc,wildbench_240612,[],holistic | |
| 225,llama_2_70b_chat,14.7,alpacaeval2_lc,wildbench_240612,[],holistic | |
| 226,qwen1.5_7b_chat,14.7,alpacaeval2_lc,wildbench_240612,[],holistic | |
| 229,llama_2_7b_chat,5.4,alpacaeval2_lc,wildbench_240612,[],holistic | |
| 230,gemma_7b_it,10.4,alpacaeval2_lc,wildbench_240612,[],holistic | |
| 231,gemma_2b_it,5.4,alpacaeval2_lc,wildbench_240612,[],holistic | |
| 232,gpt_4o_0513,51.3,alpacav2,wildbench_240612,[],holistic | |
| 233,gpt_4_turbo_0409,46.1,alpacav2,wildbench_240612,[],holistic | |
| 236,llama_3_70b_inst,33.2,alpacav2,wildbench_240612,[],holistic | |
| 237,claude_3_opus,29.1,alpacav2,wildbench_240612,[],holistic | |
| 240,llama3_inst_8b_simpo,40.5,alpacav2,wildbench_240612,[],holistic | |
| 241,claude_3_sonnet,25.6,alpacav2,wildbench_240612,[],holistic | |
| 242,qwen1.5_72b_chat,26.5,alpacav2,wildbench_240612,[],holistic | |
| 245,mistral_large,21.4,alpacav2,wildbench_240612,[],holistic | |
| 247,llama_3_8b_inst,22.6,alpacav2,wildbench_240612,[],holistic | |
| 249,mixtral_8x7b_inst,18.3,alpacav2,wildbench_240612,[],holistic | |
| 250,dbrx_instruct,18.4,alpacav2,wildbench_240612,[],holistic | |
| 252,mistral_7b_inst_v0.2,14.7,alpacav2,wildbench_240612,[],holistic | |
| 253,tulu_2_dpo_70b,16.0,alpacav2,wildbench_240612,[],holistic | |
| 254,llama_2_70b_chat,13.9,alpacav2,wildbench_240612,[],holistic | |
| 255,qwen1.5_7b_chat,11.8,alpacav2,wildbench_240612,[],holistic | |
| 258,llama_2_7b_chat,5.0,alpacav2,wildbench_240612,[],holistic | |
| 259,gemma_7b_it,6.9,alpacav2,wildbench_240612,[],holistic | |
| 260,gemma_2b_it,3.4,alpacav2,wildbench_240612,[],holistic | |
| 0,gpt_4,4.41,agentbench_overall,agentbench_240720,[],agent | |
| 1,claude_v1.3,2.77,agentbench_overall,agentbench_240720,[],agent | |
| 2,gpt_3.5_turbo,2.55,agentbench_overall,agentbench_240720,[],agent | |
| 3,text_davinci_003,2.1,agentbench_overall,agentbench_240720,[],agent | |
| 4,claude_instant_v1.1,1.9,agentbench_overall,agentbench_240720,[],agent | |
| 5,text_davinci_002,1.46,agentbench_overall,agentbench_240720,[],agent | |
| 6,text_bison_001,1.39,agentbench_overall,agentbench_240720,[],agent | |
| 7,chatglm2_v0.2,1.31,agentbench_overall,agentbench_240720,[],agent | |
| 8,openchat_v3.2,1.15,agentbench_overall,agentbench_240720,[],agent | |
| 9,wizardlm_30b,0.83,agentbench_overall,agentbench_240720,[],agent | |
| 10,vicuna_13b,0.62,agentbench_overall,agentbench_240720,[],agent | |
| 11,wizardlm_13b,0.59,agentbench_overall,agentbench_240720,[],agent | |
| 12,llama2_13b_chat,0.55,agentbench_overall,agentbench_240720,[],agent | |
| 13,codegeex2_6b,0.53,agentbench_overall,agentbench_240720,[],agent | |
| 14,openchat_8192,0.51,agentbench_overall,agentbench_240720,[],agent | |
| 15,baichuan_13b_chat,0.36,agentbench_overall,agentbench_240720,[],agent | |
| 16,koala_13b,0.34,agentbench_overall,agentbench_240720,[],agent | |
| 17,llama2_7b_chat,0.31,agentbench_overall,agentbench_240720,[],agent | |
| 18,chatglm_6b,0.31,agentbench_overall,agentbench_240720,[],agent | |
| 19,vicuna_7b,0.24,agentbench_overall,agentbench_240720,[],agent | |
| 20,internlm_chat_7b,0.23,agentbench_overall,agentbench_240720,[],agent | |
| 21,baichuan_7b,0.22,agentbench_overall,agentbench_240720,[],agent | |
| 22,wizardcoder,0.21,agentbench_overall,agentbench_240720,[],agent | |
| 23,dolly_v2_12b,0.15,agentbench_overall,agentbench_240720,[],agent | |
| 24,oasst_sft_4_pythia_12b,0.07,agentbench_overall,agentbench_240720,[],agent | |
| 25,gpt_4,36.81,agentbench_os,agentbench_240720,[],agent | |
| 26,claude_v1.3,13.19,agentbench_os,agentbench_240720,[],agent | |
| 27,gpt_3.5_turbo,32.64,agentbench_os,agentbench_240720,[],agent | |
| 28,text_davinci_003,22.92,agentbench_os,agentbench_240720,[],agent | |
| 29,claude_instant_v1.1,14.58,agentbench_os,agentbench_240720,[],agent | |
| 30,text_davinci_002,4.86,agentbench_os,agentbench_240720,[],agent | |
| 31,text_bison_001,4.17,agentbench_os,agentbench_240720,[],agent | |
| 32,chatglm2_v0.2,14.58,agentbench_os,agentbench_240720,[],agent | |
| 33,openchat_v3.2,9.72,agentbench_os,agentbench_240720,[],agent | |
| 34,wizardlm_30b,14.58,agentbench_os,agentbench_240720,[],agent | |
| 35,vicuna_13b,8.33,agentbench_os,agentbench_240720,[],agent | |
| 36,wizardlm_13b,9.72,agentbench_os,agentbench_240720,[],agent | |
| 37,llama2_13b_chat,10.42,agentbench_os,agentbench_240720,[],agent | |
| 38,codegeex2_6b,12.5,agentbench_os,agentbench_240720,[],agent | |
| 39,openchat_8192,10.42,agentbench_os,agentbench_240720,[],agent | |
| 40,baichuan_13b_chat,11.81,agentbench_os,agentbench_240720,[],agent | |
| 41,koala_13b,2.78,agentbench_os,agentbench_240720,[],agent | |
| 42,llama2_7b_chat,10.42,agentbench_os,agentbench_240720,[],agent | |
| 43,chatglm_6b,4.86,agentbench_os,agentbench_240720,[],agent | |
| 44,vicuna_7b,6.25,agentbench_os,agentbench_240720,[],agent | |
| 45,internlm_chat_7b,3.47,agentbench_os,agentbench_240720,[],agent | |
| 46,baichuan_7b,4.17,agentbench_os,agentbench_240720,[],agent | |
| 47,wizardcoder,3.47,agentbench_os,agentbench_240720,[],agent | |
| 48,dolly_v2_12b,0.0,agentbench_os,agentbench_240720,[],agent | |
| 49,oasst_sft_4_pythia_12b,2.78,agentbench_os,agentbench_240720,[],agent | |
| 50,gpt_4,33.67,agentbench_db,agentbench_240720,[],agent | |
| 51,claude_v1.3,16.75,agentbench_db,agentbench_240720,[],agent | |
| 52,gpt_3.5_turbo,15.0,agentbench_db,agentbench_240720,[],agent | |
| 53,text_davinci_003,16.33,agentbench_db,agentbench_240720,[],agent | |
| 54,claude_instant_v1.1,8.0,agentbench_db,agentbench_240720,[],agent | |
| 55,text_davinci_002,13.67,agentbench_db,agentbench_240720,[],agent | |
| 56,text_bison_001,12.75,agentbench_db,agentbench_240720,[],agent | |
| 57,chatglm2_v0.2,13.67,agentbench_db,agentbench_240720,[],agent | |
| 58,openchat_v3.2,5.33,agentbench_db,agentbench_240720,[],agent | |
| 59,wizardlm_30b,12.67,agentbench_db,agentbench_240720,[],agent | |
| 60,vicuna_13b,11.33,agentbench_db,agentbench_240720,[],agent | |
| 61,wizardlm_13b,13.0,agentbench_db,agentbench_240720,[],agent | |
| 62,llama2_13b_chat,4.5,agentbench_db,agentbench_240720,[],agent | |
| 63,codegeex2_6b,6.5,agentbench_db,agentbench_240720,[],agent | |
| 64,openchat_8192,2.67,agentbench_db,agentbench_240720,[],agent | |
| 65,baichuan_13b_chat,3.0,agentbench_db,agentbench_240720,[],agent | |
| 66,koala_13b,5.33,agentbench_db,agentbench_240720,[],agent | |
| 67,llama2_7b_chat,2.75,agentbench_db,agentbench_240720,[],agent | |
| 68,chatglm_6b,0.33,agentbench_db,agentbench_240720,[],agent | |
| 69,vicuna_7b,3.33,agentbench_db,agentbench_240720,[],agent | |
| 70,internlm_chat_7b,6.33,agentbench_db,agentbench_240720,[],agent | |
| 71,baichuan_7b,0.0,agentbench_db,agentbench_240720,[],agent | |
| 72,wizardcoder,0.0,agentbench_db,agentbench_240720,[],agent | |
| 73,dolly_v2_12b,0.0,agentbench_db,agentbench_240720,[],agent | |
| 74,oasst_sft_4_pythia_12b,0.0,agentbench_db,agentbench_240720,[],agent | |
| 75,gpt_4,52.14,agentbench_kg,agentbench_240720,[],agent | |
| 76,claude_v1.3,36.22,agentbench_kg,agentbench_240720,[],agent | |
| 77,gpt_3.5_turbo,27.2,agentbench_kg,agentbench_240720,[],agent | |
| 78,text_davinci_003,30.82,agentbench_kg,agentbench_240720,[],agent | |
| 79,claude_instant_v1.1,29.67,agentbench_kg,agentbench_240720,[],agent | |
| 80,text_davinci_002,18.87,agentbench_kg,agentbench_240720,[],agent | |
| 81,text_bison_001,17.12,agentbench_kg,agentbench_240720,[],agent | |
| 82,chatglm2_v0.2,6.85,agentbench_kg,agentbench_240720,[],agent | |
| 83,openchat_v3.2,6.84,agentbench_kg,agentbench_240720,[],agent | |
| 84,wizardlm_30b,2.33,agentbench_kg,agentbench_240720,[],agent | |
| 85,vicuna_13b,1.24,agentbench_kg,agentbench_240720,[],agent | |
| 86,wizardlm_13b,0.44,agentbench_kg,agentbench_240720,[],agent | |
| 87,llama2_13b_chat,3.11,agentbench_kg,agentbench_240720,[],agent | |
| 88,codegeex2_6b,6.35,agentbench_kg,agentbench_240720,[],agent | |
| 89,openchat_8192,0.59,agentbench_kg,agentbench_240720,[],agent | |
| 90,baichuan_13b_chat,6.27,agentbench_kg,agentbench_240720,[],agent | |
| 91,koala_13b,0.0,agentbench_kg,agentbench_240720,[],agent | |
| 92,llama2_7b_chat,1.89,agentbench_kg,agentbench_240720,[],agent | |
| 93,chatglm_6b,0.0,agentbench_kg,agentbench_240720,[],agent | |
| 94,vicuna_7b,0.0,agentbench_kg,agentbench_240720,[],agent | |
| 95,internlm_chat_7b,0.0,agentbench_kg,agentbench_240720,[],agent | |
| 96,baichuan_7b,0.46,agentbench_kg,agentbench_240720,[],agent | |
| 97,wizardcoder,2.78,agentbench_kg,agentbench_240720,[],agent | |
| 98,dolly_v2_12b,0.0,agentbench_kg,agentbench_240720,[],agent | |
| 99,oasst_sft_4_pythia_12b,0.0,agentbench_kg,agentbench_240720,[],agent | |
| 100,gpt_4,50.0,agentbench_dcg,agentbench_240720,[],agent | |
| 101,claude_v1.3,30.0,agentbench_dcg,agentbench_240720,[],agent | |
| 102,gpt_3.5_turbo,30.0,agentbench_dcg,agentbench_240720,[],agent | |
| 103,text_davinci_003,15.0,agentbench_dcg,agentbench_240720,[],agent | |
| 104,claude_instant_v1.1,35.0,agentbench_dcg,agentbench_240720,[],agent | |
| 105,text_davinci_002,25.0,agentbench_dcg,agentbench_240720,[],agent | |
| 106,text_bison_001,20.0,agentbench_dcg,agentbench_240720,[],agent | |
| 107,chatglm2_v0.2,10.0,agentbench_dcg,agentbench_240720,[],agent | |
| 108,openchat_v3.2,0.0,agentbench_dcg,agentbench_240720,[],agent | |
| 109,wizardlm_30b,10.0,agentbench_dcg,agentbench_240720,[],agent | |
| 110,vicuna_13b,0.0,agentbench_dcg,agentbench_240720,[],agent | |
| 111,wizardlm_13b,0.0,agentbench_dcg,agentbench_240720,[],agent | |
| 112,llama2_13b_chat,0.0,agentbench_dcg,agentbench_240720,[],agent | |
| 113,codegeex2_6b,0.0,agentbench_dcg,agentbench_240720,[],agent | |
| 114,openchat_8192,10.0,agentbench_dcg,agentbench_240720,[],agent | |
| 115,baichuan_13b_chat,0.0,agentbench_dcg,agentbench_240720,[],agent | |
| 116,koala_13b,0.0,agentbench_dcg,agentbench_240720,[],agent | |
| 117,llama2_7b_chat,0.0,agentbench_dcg,agentbench_240720,[],agent | |
| 118,chatglm_6b,0.0,agentbench_dcg,agentbench_240720,[],agent | |
| 119,vicuna_7b,0.0,agentbench_dcg,agentbench_240720,[],agent | |
| 120,internlm_chat_7b,0.0,agentbench_dcg,agentbench_240720,[],agent | |
| 121,baichuan_7b,0.0,agentbench_dcg,agentbench_240720,[],agent | |
| 122,wizardcoder,0.0,agentbench_dcg,agentbench_240720,[],agent | |
| 123,dolly_v2_12b,0.0,agentbench_dcg,agentbench_240720,[],agent | |
| 124,oasst_sft_4_pythia_12b,0.0,agentbench_dcg,agentbench_240720,[],agent | |
| 125,gpt_4,17.6,agentbench_ltp,agentbench_240720,[],agent | |
| 126,claude_v1.3,6.39,agentbench_ltp,agentbench_240720,[],agent | |
| 127,gpt_3.5_turbo,14.85,agentbench_ltp,agentbench_240720,[],agent | |
| 128,text_davinci_003,5.21,agentbench_ltp,agentbench_240720,[],agent | |
| 129,claude_instant_v1.1,6.08,agentbench_ltp,agentbench_240720,[],agent | |
| 130,text_davinci_002,2.32,agentbench_ltp,agentbench_240720,[],agent | |
| 131,text_bison_001,0.12,agentbench_ltp,agentbench_240720,[],agent | |
| 132,chatglm2_v0.2,12.62,agentbench_ltp,agentbench_240720,[],agent | |
| 133,openchat_v3.2,9.54,agentbench_ltp,agentbench_240720,[],agent | |
| 134,wizardlm_30b,4.47,agentbench_ltp,agentbench_240720,[],agent | |
| 135,vicuna_13b,7.97,agentbench_ltp,agentbench_240720,[],agent | |
| 136,wizardlm_13b,4.06,agentbench_ltp,agentbench_240720,[],agent | |
| 137,llama2_13b_chat,3.69,agentbench_ltp,agentbench_240720,[],agent | |
| 138,codegeex2_6b,1.94,agentbench_ltp,agentbench_240720,[],agent | |
| 139,openchat_8192,0.0,agentbench_ltp,agentbench_240720,[],agent | |
| 140,baichuan_13b_chat,1.3,agentbench_ltp,agentbench_240720,[],agent | |
| 141,koala_13b,2.46,agentbench_ltp,agentbench_240720,[],agent | |
| 142,llama2_7b_chat,2.1,agentbench_ltp,agentbench_240720,[],agent | |
| 143,chatglm_6b,6.69,agentbench_ltp,agentbench_240720,[],agent | |
| 144,vicuna_7b,3.08,agentbench_ltp,agentbench_240720,[],agent | |
| 145,internlm_chat_7b,3.41,agentbench_ltp,agentbench_240720,[],agent | |
| 146,baichuan_7b,2.29,agentbench_ltp,agentbench_240720,[],agent | |
| 147,wizardcoder,1.32,agentbench_ltp,agentbench_240720,[],agent | |
| 148,dolly_v2_12b,3.36,agentbench_ltp,agentbench_240720,[],agent | |
| 149,oasst_sft_4_pythia_12b,1.48,agentbench_ltp,agentbench_240720,[],agent | |
| 150,gpt_4,78.0,agentbench_hh,agentbench_240720,[],agent | |
| 151,claude_v1.3,52.0,agentbench_hh,agentbench_240720,[],agent | |
| 152,gpt_3.5_turbo,14.0,agentbench_hh,agentbench_240720,[],agent | |
| 153,text_davinci_003,20.0,agentbench_hh,agentbench_240720,[],agent | |
| 154,claude_instant_v1.1,26.0,agentbench_hh,agentbench_240720,[],agent | |
| 155,text_davinci_002,14.0,agentbench_hh,agentbench_240720,[],agent | |
| 156,text_bison_001,4.0,agentbench_hh,agentbench_240720,[],agent | |
| 157,chatglm2_v0.2,6.0,agentbench_hh,agentbench_240720,[],agent | |
| 158,openchat_v3.2,8.0,agentbench_hh,agentbench_240720,[],agent | |
| 159,wizardlm_30b,6.0,agentbench_hh,agentbench_240720,[],agent | |
| 160,vicuna_13b,0.0,agentbench_hh,agentbench_240720,[],agent | |
| 161,wizardlm_13b,6.0,agentbench_hh,agentbench_240720,[],agent | |
| 162,llama2_13b_chat,2.0,agentbench_hh,agentbench_240720,[],agent | |
| 163,codegeex2_6b,0.0,agentbench_hh,agentbench_240720,[],agent | |
| 164,openchat_8192,4.0,agentbench_hh,agentbench_240720,[],agent | |
| 165,baichuan_13b_chat,0.0,agentbench_hh,agentbench_240720,[],agent | |
| 166,koala_13b,0.0,agentbench_hh,agentbench_240720,[],agent | |
| 167,llama2_7b_chat,0.0,agentbench_hh,agentbench_240720,[],agent | |
| 168,chatglm_6b,0.0,agentbench_hh,agentbench_240720,[],agent | |
| 169,vicuna_7b,0.0,agentbench_hh,agentbench_240720,[],agent | |
| 170,internlm_chat_7b,0.0,agentbench_hh,agentbench_240720,[],agent | |
| 171,baichuan_7b,0.0,agentbench_hh,agentbench_240720,[],agent | |
| 172,wizardcoder,0.0,agentbench_hh,agentbench_240720,[],agent | |
| 173,dolly_v2_12b,0.0,agentbench_hh,agentbench_240720,[],agent | |
| 174,oasst_sft_4_pythia_12b,0.0,agentbench_hh,agentbench_240720,[],agent | |
| 175,gpt_4,58.6,agentbench_ws,agentbench_240720,[],agent | |
| 176,claude_v1.3,59.26,agentbench_ws,agentbench_240720,[],agent | |
| 177,gpt_3.5_turbo,67.21,agentbench_ws,agentbench_240720,[],agent | |
| 178,text_davinci_003,61.43,agentbench_ws,agentbench_240720,[],agent | |
| 179,claude_instant_v1.1,44.22,agentbench_ws,agentbench_240720,[],agent | |
| 180,text_davinci_002,60.15,agentbench_ws,agentbench_240720,[],agent | |
| 181,text_bison_001,46.06,agentbench_ws,agentbench_240720,[],agent | |
| 182,chatglm2_v0.2,19.35,agentbench_ws,agentbench_240720,[],agent | |
| 183,openchat_v3.2,50.17,agentbench_ws,agentbench_240720,[],agent | |
| 184,wizardlm_30b,10.6,agentbench_ws,agentbench_240720,[],agent | |
| 185,vicuna_13b,12.57,agentbench_ws,agentbench_240720,[],agent | |
| 186,wizardlm_13b,1.2,agentbench_ws,agentbench_240720,[],agent | |
| 187,llama2_13b_chat,3.12,agentbench_ws,agentbench_240720,[],agent | |
| 188,codegeex2_6b,11.8,agentbench_ws,agentbench_240720,[],agent | |
| 189,openchat_8192,6.68,agentbench_ws,agentbench_240720,[],agent | |
| 190,baichuan_13b_chat,5.74,agentbench_ws,agentbench_240720,[],agent | |
| 191,koala_13b,5.96,agentbench_ws,agentbench_240720,[],agent | |
| 192,llama2_7b_chat,2.22,agentbench_ws,agentbench_240720,[],agent | |
| 193,chatglm_6b,0.5,agentbench_ws,agentbench_240720,[],agent | |
| 194,vicuna_7b,6.4,agentbench_ws,agentbench_240720,[],agent | |
| 195,internlm_chat_7b,0.0,agentbench_ws,agentbench_240720,[],agent | |
| 196,baichuan_7b,2.84,agentbench_ws,agentbench_240720,[],agent | |
| 197,wizardcoder,0.0,agentbench_ws,agentbench_240720,[],agent | |
| 198,dolly_v2_12b,0.38,agentbench_ws,agentbench_240720,[],agent | |
| 199,oasst_sft_4_pythia_12b,0.0,agentbench_ws,agentbench_240720,[],agent | |
| 200,gpt_4,22.59,agentbench_wb,agentbench_240720,[],agent | |
| 201,claude_v1.3,20.97,agentbench_wb,agentbench_240720,[],agent | |
| 202,gpt_3.5_turbo,15.69,agentbench_wb,agentbench_240720,[],agent | |
| 203,text_davinci_003,15.52,agentbench_wb,agentbench_240720,[],agent | |
| 204,claude_instant_v1.1,0.77,agentbench_wb,agentbench_240720,[],agent | |
| 205,text_davinci_002,1.11,agentbench_wb,agentbench_240720,[],agent | |
| 206,text_bison_001,20.46,agentbench_wb,agentbench_240720,[],agent | |
| 207,chatglm2_v0.2,12.87,agentbench_wb,agentbench_240720,[],agent | |
| 208,openchat_v3.2,14.92,agentbench_wb,agentbench_240720,[],agent | |
| 209,wizardlm_30b,3.07,agentbench_wb,agentbench_240720,[],agent | |
| 210,vicuna_13b,3.92,agentbench_wb,agentbench_240720,[],agent | |
| 211,wizardlm_13b,5.8,agentbench_wb,agentbench_240720,[],agent | |
| 212,llama2_13b_chat,11.94,agentbench_wb,agentbench_240720,[],agent | |
| 213,codegeex2_6b,5.37,agentbench_wb,agentbench_240720,[],agent | |
| 214,openchat_8192,7.08,agentbench_wb,agentbench_240720,[],agent | |
| 215,baichuan_13b_chat,2.3,agentbench_wb,agentbench_240720,[],agent | |
| 216,koala_13b,8.1,agentbench_wb,agentbench_240720,[],agent | |
| 217,llama2_7b_chat,3.75,agentbench_wb,agentbench_240720,[],agent | |
| 218,chatglm_6b,4.94,agentbench_wb,agentbench_240720,[],agent | |
| 219,vicuna_7b,0.17,agentbench_wb,agentbench_240720,[],agent | |
| 220,internlm_chat_7b,0.17,agentbench_wb,agentbench_240720,[],agent | |
| 221,baichuan_7b,5.8,agentbench_wb,agentbench_240720,[],agent | |
| 222,wizardcoder,6.65,agentbench_wb,agentbench_240720,[],agent | |
| 223,dolly_v2_12b,4.43,agentbench_wb,agentbench_240720,[],agent | |
| 224,oasst_sft_4_pythia_12b,0.34,agentbench_wb,agentbench_240720,[],agent | |
| 0,pythia_1b,31.4,arc_c,olmes_260624,[],reasoning | |
| 1,olmo_1b,38.6,arc_c,olmes_260624,[],reasoning | |
| 2,tinyllama_1.1b,38.1,arc_c,olmes_260624,[],reasoning | |
| 3,pythia_6.7b,44.6,arc_c,olmes_260624,[],reasoning | |
| 4,rpj_incite_7b,45.3,arc_c,olmes_260624,[],reasoning | |
| 5,stablelm2_1.6b,50.6,arc_c,olmes_260624,[],reasoning | |
| 6,olmo_7b,46.4,arc_c,olmes_260624,[],reasoning | |
| 7,mpt_7b,45.7,arc_c,olmes_260624,[],reasoning | |
| 8,falcon_7b,49.7,arc_c,olmes_260624,[],reasoning | |
| 9,llama2_7b,54.2,arc_c,olmes_260624,[],reasoning | |
| 10,llama2_13b,67.3,arc_c,olmes_260624,[],reasoning | |
| 11,olmo_1.7_7b,66.9,arc_c,olmes_260624,[],reasoning | |
| 12,llama3_8b,79.3,arc_c,olmes_260624,[],reasoning | |
| 13,mistral_7b_v0.1,78.6,arc_c,olmes_260624,[],reasoning | |
| 14,llama3_70b,93.7,arc_c,olmes_260624,[],reasoning | |
| 15,pythia_1b,63.4,arc_e,olmes_260624,[],reasoning | |
| 16,olmo_1b,68.3,arc_e,olmes_260624,[],reasoning | |
| 17,tinyllama_1.1b,69.5,arc_e,olmes_260624,[],reasoning | |
| 18,pythia_6.7b,72.6,arc_e,olmes_260624,[],reasoning | |
| 19,rpj_incite_7b,78.8,arc_e,olmes_260624,[],reasoning | |
| 20,stablelm2_1.6b,75.3,arc_e,olmes_260624,[],reasoning | |
| 21,olmo_7b,78.9,arc_e,olmes_260624,[],reasoning | |
| 22,mpt_7b,78.0,arc_e,olmes_260624,[],reasoning | |
| 23,falcon_7b,80.6,arc_e,olmes_260624,[],reasoning | |
| 24,llama2_7b,84.0,arc_e,olmes_260624,[],reasoning | |
| 25,llama2_13b,85.9,arc_e,olmes_260624,[],reasoning | |
| 26,olmo_1.7_7b,83.6,arc_e,olmes_260624,[],reasoning | |
| 27,llama3_8b,92.4,arc_e,olmes_260624,[],reasoning | |
| 28,mistral_7b_v0.1,90.8,arc_e,olmes_260624,[],reasoning | |
| 29,llama3_70b,97.7,arc_e,olmes_260624,[],reasoning | |
| 30,pythia_1b,56.8,boolq,olmes_260624,[],knowledge | |
| 31,olmo_1b,51.3,boolq,olmes_260624,[],knowledge | |
| 32,tinyllama_1.1b,63.6,boolq,olmes_260624,[],knowledge | |
| 33,pythia_6.7b,68.7,boolq,olmes_260624,[],knowledge | |
| 34,rpj_incite_7b,72.0,boolq,olmes_260624,[],knowledge | |
| 35,stablelm2_1.6b,82.3,boolq,olmes_260624,[],knowledge | |
| 36,olmo_7b,78.7,boolq,olmes_260624,[],knowledge | |
| 37,mpt_7b,82.4,boolq,olmes_260624,[],knowledge | |
| 38,falcon_7b,78.2,boolq,olmes_260624,[],knowledge | |
| 39,llama2_7b,86.1,boolq,olmes_260624,[],knowledge | |
| 40,llama2_13b,86.7,boolq,olmes_260624,[],knowledge | |
| 41,olmo_1.7_7b,85.9,boolq,olmes_260624,[],knowledge | |
| 42,llama3_8b,87.5,boolq,olmes_260624,[],knowledge | |
| 43,mistral_7b_v0.1,89.3,boolq,olmes_260624,[],knowledge | |
| 44,llama3_70b,91.7,boolq,olmes_260624,[],knowledge | |
| 45,pythia_1b,50.9,csqa,olmes_260624,[],knowledge | |
| 46,olmo_1b,62.2,csqa,olmes_260624,[],knowledge | |
| 47,tinyllama_1.1b,61.1,csqa,olmes_260624,[],knowledge | |
| 48,pythia_6.7b,62.1,csqa,olmes_260624,[],knowledge | |
| 49,rpj_incite_7b,69.2,csqa,olmes_260624,[],knowledge | |
| 50,stablelm2_1.6b,70.4,csqa,olmes_260624,[],knowledge | |
| 51,olmo_7b,70.8,csqa,olmes_260624,[],knowledge | |
| 52,mpt_7b,70.9,csqa,olmes_260624,[],knowledge | |
| 53,falcon_7b,73.4,csqa,olmes_260624,[],knowledge | |
| 54,llama2_7b,74.2,csqa,olmes_260624,[],knowledge | |
| 55,llama2_13b,74.0,csqa,olmes_260624,[],knowledge | |
| 56,olmo_1.7_7b,85.8,csqa,olmes_260624,[],knowledge | |
| 57,llama3_8b,73.9,csqa,olmes_260624,[],knowledge | |
| 58,mistral_7b_v0.1,72.4,csqa,olmes_260624,[],knowledge | |
| 59,llama3_70b,83.2,csqa,olmes_260624,[],knowledge | |
| 60,pythia_1b,48.0,hellaswag,olmes_260624,[],reasoning | |
| 61,olmo_1b,65.2,hellaswag,olmes_260624,[],reasoning | |
| 62,tinyllama_1.1b,60.8,hellaswag,olmes_260624,[],reasoning | |
| 63,pythia_6.7b,66.1,hellaswag,olmes_260624,[],reasoning | |
| 64,rpj_incite_7b,72.8,hellaswag,olmes_260624,[],reasoning | |
| 65,stablelm2_1.6b,70.3,hellaswag,olmes_260624,[],reasoning | |
| 66,olmo_7b,78.1,hellaswag,olmes_260624,[],reasoning | |
| 67,mpt_7b,79.6,hellaswag,olmes_260624,[],reasoning | |
| 68,falcon_7b,79.0,hellaswag,olmes_260624,[],reasoning | |
| 69,llama2_7b,78.9,hellaswag,olmes_260624,[],reasoning | |
| 70,llama2_13b,83.9,hellaswag,olmes_260624,[],reasoning | |
| 71,olmo_1.7_7b,80.1,hellaswag,olmes_260624,[],reasoning | |
| 72,llama3_8b,81.8,hellaswag,olmes_260624,[],reasoning | |
| 73,mistral_7b_v0.1,83.0,hellaswag,olmes_260624,[],reasoning | |
| 74,llama3_70b,89.5,hellaswag,olmes_260624,[],reasoning | |
| 75,pythia_1b,31.1,mmlu,olmes_260624,[],knowledge | |
| 76,olmo_1b,33.4,mmlu,olmes_260624,[],knowledge | |
| 77,tinyllama_1.1b,33.6,mmlu,olmes_260624,[],knowledge | |
| 78,pythia_6.7b,37.7,mmlu,olmes_260624,[],knowledge | |
| 79,rpj_incite_7b,40.1,mmlu,olmes_260624,[],knowledge | |
| 80,stablelm2_1.6b,40.4,mmlu,olmes_260624,[],knowledge | |
| 81,olmo_7b,40.5,mmlu,olmes_260624,[],knowledge | |
| 82,mpt_7b,40.6,mmlu,olmes_260624,[],knowledge | |
| 83,falcon_7b,42.1,mmlu,olmes_260624,[],knowledge | |
| 84,llama2_7b,46.2,mmlu,olmes_260624,[],knowledge | |
| 85,llama2_13b,55.8,mmlu,olmes_260624,[],knowledge | |
| 86,olmo_1.7_7b,54.4,mmlu,olmes_260624,[],knowledge | |
| 87,llama3_8b,66.6,mmlu,olmes_260624,[],knowledge | |
| 88,mistral_7b_v0.1,64.0,mmlu,olmes_260624,[],knowledge | |
| 89,llama3_70b,79.8,mmlu,olmes_260624,[],knowledge | |
| 90,pythia_1b,40.4,openbookqa,olmes_260624,[],knowledge | |
| 91,olmo_1b,47.6,openbookqa,olmes_260624,[],knowledge | |
| 92,tinyllama_1.1b,45.0,openbookqa,olmes_260624,[],knowledge | |
| 93,pythia_6.7b,50.4,openbookqa,olmes_260624,[],knowledge | |
| 94,rpj_incite_7b,49.0,openbookqa,olmes_260624,[],knowledge | |
| 95,stablelm2_1.6b,56.6,openbookqa,olmes_260624,[],knowledge | |
| 96,olmo_7b,55.8,openbookqa,olmes_260624,[],knowledge | |
| 97,mpt_7b,52.4,openbookqa,olmes_260624,[],knowledge | |
| 98,falcon_7b,55.2,openbookqa,olmes_260624,[],knowledge | |
| 99,llama2_7b,57.8,openbookqa,olmes_260624,[],knowledge | |
| 100,llama2_13b,65.4,openbookqa,olmes_260624,[],knowledge | |
| 101,olmo_1.7_7b,68.6,openbookqa,olmes_260624,[],knowledge | |
| 102,llama3_8b,77.2,openbookqa,olmes_260624,[],knowledge | |
| 103,mistral_7b_v0.1,80.6,openbookqa,olmes_260624,[],knowledge | |
| 104,llama3_70b,93.4,openbookqa,olmes_260624,[],knowledge | |
| 105,pythia_1b,68.9,piqa,olmes_260624,[],reasoning | |
| 106,olmo_1b,74.1,piqa,olmes_260624,[],reasoning | |
| 107,tinyllama_1.1b,71.7,piqa,olmes_260624,[],reasoning | |
| 108,pythia_6.7b,74.9,piqa,olmes_260624,[],reasoning | |
| 109,rpj_incite_7b,75.9,piqa,olmes_260624,[],reasoning | |
| 110,stablelm2_1.6b,75.6,piqa,olmes_260624,[],reasoning | |
| 111,olmo_7b,78.5,piqa,olmes_260624,[],reasoning | |
| 112,mpt_7b,79.2,piqa,olmes_260624,[],reasoning | |
| 113,falcon_7b,79.0,piqa,olmes_260624,[],reasoning | |
| 114,llama2_7b,77.5,piqa,olmes_260624,[],reasoning | |
| 115,llama2_13b,80.2,piqa,olmes_260624,[],reasoning | |
| 116,olmo_1.7_7b,80.3,piqa,olmes_260624,[],reasoning | |
| 117,llama3_8b,81.6,piqa,olmes_260624,[],reasoning | |
| 118,mistral_7b_v0.1,82.8,piqa,olmes_260624,[],reasoning | |
| 119,llama3_70b,91.6,piqa,olmes_260624,[],reasoning | |
| 120,pythia_1b,46.4,siqa,olmes_260624,[],other | |
| 121,olmo_1b,51.5,siqa,olmes_260624,[],other | |
| 122,tinyllama_1.1b,50.4,siqa,olmes_260624,[],other | |
| 123,pythia_6.7b,51.7,siqa,olmes_260624,[],other | |
| 124,rpj_incite_7b,56.6,siqa,olmes_260624,[],other | |
| 125,stablelm2_1.6b,64.3,siqa,olmes_260624,[],other | |
| 126,olmo_7b,56.5,siqa,olmes_260624,[],other | |
| 127,mpt_7b,57.4,siqa,olmes_260624,[],other | |
| 128,falcon_7b,60.1,siqa,olmes_260624,[],other | |
| 129,llama2_7b,59.6,siqa,olmes_260624,[],other | |
| 130,llama2_13b,65.9,siqa,olmes_260624,[],other | |
| 131,olmo_1.7_7b,76.1,siqa,olmes_260624,[],other | |
| 132,llama3_8b,70.2,siqa,olmes_260624,[],other | |
| 133,mistral_7b_v0.1,71.3,siqa,olmes_260624,[],other | |
| 134,llama3_70b,78.9,siqa,olmes_260624,[],other | |
| 135,pythia_1b,52.7,winogrande,olmes_260624,[],reasoning | |
| 136,olmo_1b,59.3,winogrande,olmes_260624,[],reasoning | |
| 137,tinyllama_1.1b,60.1,winogrande,olmes_260624,[],reasoning | |
| 138,pythia_6.7b,62.3,winogrande,olmes_260624,[],reasoning | |
| 139,rpj_incite_7b,68.0,winogrande,olmes_260624,[],reasoning | |
| 140,stablelm2_1.6b,65.7,winogrande,olmes_260624,[],reasoning | |
| 141,olmo_7b,68.5,winogrande,olmes_260624,[],reasoning | |
| 142,mpt_7b,70.2,winogrande,olmes_260624,[],reasoning | |
| 143,falcon_7b,71.3,winogrande,olmes_260624,[],reasoning | |
| 144,llama2_7b,71.7,winogrande,olmes_260624,[],reasoning | |
| 145,llama2_13b,74.9,winogrande,olmes_260624,[],reasoning | |
| 146,olmo_1.7_7b,73.6,winogrande,olmes_260624,[],reasoning | |
| 147,llama3_8b,76.2,winogrande,olmes_260624,[],reasoning | |
| 148,mistral_7b_v0.1,77.9,winogrande,olmes_260624,[],reasoning | |
| 149,llama3_70b,84.1,winogrande,olmes_260624,[],reasoning | |
| 150,pythia_1b,49.0,olmes_average,olmes_260624,[],holistic | |
| 151,olmo_1b,55.1,olmes_average,olmes_260624,[],holistic | |
| 152,tinyllama_1.1b,55.4,olmes_average,olmes_260624,[],holistic | |
| 153,pythia_6.7b,59.1,olmes_average,olmes_260624,[],holistic | |
| 154,rpj_incite_7b,62.8,olmes_average,olmes_260624,[],holistic | |
| 155,stablelm2_1.6b,65.1,olmes_average,olmes_260624,[],holistic | |
| 156,olmo_7b,65.3,olmes_average,olmes_260624,[],holistic | |
| 157,mpt_7b,65.6,olmes_average,olmes_260624,[],holistic | |
| 158,falcon_7b,66.9,olmes_average,olmes_260624,[],holistic | |
| 159,llama2_7b,69.0,olmes_average,olmes_260624,[],holistic | |
| 160,llama2_13b,74.0,olmes_average,olmes_260624,[],holistic | |
| 161,olmo_1.7_7b,75.5,olmes_average,olmes_260624,[],holistic | |
| 162,llama3_8b,78.7,olmes_average,olmes_260624,[],holistic | |
| 163,mistral_7b_v0.1,79.1,olmes_average,olmes_260624,[],holistic | |
| 164,llama3_70b,88.4,olmes_average,olmes_260624,[],holistic | |
| 0,llama_2_70b,0.3753,mmlu_pro,mmlu_pro_240610,[],knowledge | |
| 1,llama_3_8b,0.3536,mmlu_pro,mmlu_pro_240610,[],knowledge | |
| 2,deepseekmath_instruct,0.353,mmlu_pro,mmlu_pro_240610,[],knowledge | |
| 3,gemma_7b,0.3373,mmlu_pro,mmlu_pro_240610,[],knowledge | |
| 4,mistral_7b_v0.1,0.3088,mmlu_pro,mmlu_pro_240610,[],knowledge | |
| 5,mistral_7b_instruct_v0.2,0.3084,mmlu_pro,mmlu_pro_240610,[],knowledge | |
| 6,mistral_7b_v0.2,0.3043,mmlu_pro,mmlu_pro_240610,[],knowledge | |
| 7,qwen1.5_7b_chat,0.2906,mmlu_pro,mmlu_pro_240610,[],knowledge | |
| 8,yi_6b_chat,0.2884,mmlu_pro,mmlu_pro_240610,[],knowledge | |
| 9,yi_6b,0.2651,mmlu_pro,mmlu_pro_240610,[],knowledge | |
| 10,mistral_7b_instruct_v0.1,0.2575,mmlu_pro,mmlu_pro_240610,[],knowledge | |
| 11,llama_2_13b,0.2534,mmlu_pro,mmlu_pro_240610,[],knowledge | |
| 12,llemma_7b,0.2345,mmlu_pro,mmlu_pro_240610,[],knowledge | |
| 13,llama_2_7b,0.2032,mmlu_pro,mmlu_pro_240610,[],knowledge | |
| 14,gpt_4o,0.7255,mmlu_pro,mmlu_pro_240610,[],knowledge | |
| 15,claude_3_opus,0.6845,mmlu_pro,mmlu_pro_240610,[],knowledge | |
| 16,gpt_4_turbo,0.6371,mmlu_pro,mmlu_pro_240610,[],knowledge | |
| 17,gemini_1.5_flash,0.5912,mmlu_pro,mmlu_pro_240610,[],knowledge | |
| 18,yi_large,0.5753,mmlu_pro,mmlu_pro_240610,[],knowledge | |
| 19,claude_3_sonnet,0.568,mmlu_pro,mmlu_pro_240610,[],knowledge | |
| 20,llama_3_70b_instruct,0.562,mmlu_pro,mmlu_pro_240610,[],knowledge | |
| 21,deepseek_v2,0.5481,mmlu_pro,mmlu_pro_240610,[],knowledge | |
| 22,phi_3_medium_4k_instruct,0.5348,mmlu_pro,mmlu_pro_240610,[],knowledge | |
| 23,llama_3_70b,0.5278,mmlu_pro,mmlu_pro_240610,[],knowledge | |
| 24,qwen1.5_72b_chat,0.5162,mmlu_pro,mmlu_pro_240610,[],knowledge | |
| 25,mammoth2_8x7b_plus,0.504,mmlu_pro,mmlu_pro_240610,[],knowledge | |
| 26,qwen1.5_110b,0.4993,mmlu_pro,mmlu_pro_240610,[],knowledge | |
| 27,mammoth2_8b_plus,0.4335,mmlu_pro,mmlu_pro_240610,[],knowledge | |
| 28,mixtral_8x7b_instruct_v0.1,0.4327,mmlu_pro,mmlu_pro_240610,[],knowledge | |
| 29,phi_3_mini_4k_instruct,0.4317,mmlu_pro,mmlu_pro_240610,[],knowledge | |
| 30,yi_34b,0.4303,mmlu_pro,mmlu_pro_240610,[],knowledge | |
| 31,mixtral_8x7b_v0.1,0.4103,mmlu_pro,mmlu_pro_240610,[],knowledge | |
| 32,llama_3_8b_instruct,0.4098,mmlu_pro,mmlu_pro_240610,[],knowledge | |
| 33,mammoth2_7b_plus,0.4085,mmlu_pro,mmlu_pro_240610,[],knowledge | |
| 34,qwen1.5_14b_chat,0.3802,mmlu_pro,mmlu_pro_240610,[],knowledge | |
| 35,c4ai_command_r_v01,0.379,mmlu_pro,mmlu_pro_240610,[],knowledge | |
| 0,claude_3_5_sonnet_20240620,61.16,livebench_average,livebench_240701,[],holistic | |
| 1,gpt_4o_2024_05_13,54.96,livebench_average,livebench_240701,[],holistic | |
| 2,gpt_4_turbo_2024_04_09,53.0,livebench_average,livebench_240701,[],holistic | |
| 3,gpt_4_1106_preview,52.17,livebench_average,livebench_240701,[],holistic | |
| 4,claude_3_opus_20240229,50.75,livebench_average,livebench_240701,[],holistic | |
| 5,gpt_4_0125_preview,49.39,livebench_average,livebench_240701,[],holistic | |
| 6,deepseek_coder_v2,46.79,livebench_average,livebench_240701,[],holistic | |
| 7,gemini_1.5_pro_api_0514,44.35,livebench_average,livebench_240701,[],holistic | |
| 8,gemma_2_27b_it,41.22,livebench_average,livebench_240701,[],holistic | |
| 9,gemini_1.5_flash_api_0514,40.89,livebench_average,livebench_240701,[],holistic | |
| 10,qwen2_72b_instruct,40.16,livebench_average,livebench_240701,[],holistic | |
| 11,acm_rewrite_qwen2_72b_chat,39.6,livebench_average,livebench_240701,[],holistic | |
| 12,mistral_large_2402,38.92,livebench_average,livebench_240701,[],holistic | |
| 13,deepseek_chat_v2,38.39,livebench_average,livebench_240701,[],holistic | |
| 14,claude_3_sonnet_20240229,38.08,livebench_average,livebench_240701,[],holistic | |
| 15,meta_llama_3_70b_instruct,37.38,livebench_average,livebench_240701,[],holistic | |
| 16,claude_3_haiku_20240307,35.32,livebench_average,livebench_240701,[],holistic | |
| 17,mixtral_8x22b_instruct_v0.1,34.84,livebench_average,livebench_240701,[],holistic | |
| 18,gpt_3.5_turbo_0125,34.43,livebench_average,livebench_240701,[],holistic | |
| 19,gpt_3.5_turbo_1106,34.14,livebench_average,livebench_240701,[],holistic | |
| 20,command_r_plus,32.86,livebench_average,livebench_240701,[],holistic | |
| 21,mistral_small_2402,32.8,livebench_average,livebench_240701,[],holistic | |
| 22,gemma_2_9b_it,31.57,livebench_average,livebench_240701,[],holistic | |
| 23,phi_3_medium_4k_instruct,30.33,livebench_average,livebench_240701,[],holistic | |
| 24,phi_3_medium_128k_instruct,29.64,livebench_average,livebench_240701,[],holistic | |
| 25,deepseek_coder_v2_lite_instruct,29.15,livebench_average,livebench_240701,[],holistic | |
| 26,qwen1.5_110b_chat,28.96,livebench_average,livebench_240701,[],holistic | |
| 27,qwen1.5_72b_chat,28.89,livebench_average,livebench_240701,[],holistic | |
| 28,command_r,27.23,livebench_average,livebench_240701,[],holistic | |
| 29,phi_3_small_128k_instruct,27.19,livebench_average,livebench_240701,[],holistic | |
| 30,meta_llama_3_8b_instruct,26.67,livebench_average,livebench_240701,[],holistic | |
| 31,qwen2_7b_instruct,26.45,livebench_average,livebench_240701,[],holistic | |
| 32,phi_3_small_8k_instruct,26.24,livebench_average,livebench_240701,[],holistic | |
| 33,openhermes_2.5_mistral_7b,23.3,livebench_average,livebench_240701,[],holistic | |
| 34,mixtral_8x7b_instruct_v0.1,22.5,livebench_average,livebench_240701,[],holistic | |
| 35,mistral_7b_instruct_v0.2,19.33,livebench_average,livebench_240701,[],holistic | |
| 36,phi_3_mini_4k_instruct,19.27,livebench_average,livebench_240701,[],holistic | |
| 37,zephyr_7b_alpha,19.22,livebench_average,livebench_240701,[],holistic | |
| 38,phi_3_mini_128k_instruct,18.04,livebench_average,livebench_240701,[],holistic | |
| 39,zephyr_7b_beta,17.32,livebench_average,livebench_240701,[],holistic | |
| 40,deepseek_v2_lite_chat,17.14,livebench_average,livebench_240701,[],holistic | |
| 41,qwen1.5_7b_chat,16.5,livebench_average,livebench_240701,[],holistic | |
| 42,starling_lm_7b_beta,16.44,livebench_average,livebench_240701,[],holistic | |
| 43,vicuna_7b_v1.5_16k,13.71,livebench_average,livebench_240701,[],holistic | |
| 44,vicuna_7b_v1.5,11.73,livebench_average,livebench_240701,[],holistic | |
| 45,qwen1.5_4b_chat,11.13,livebench_average,livebench_240701,[],holistic | |
| 46,llama_2_7b_chat,10.25,livebench_average,livebench_240701,[],holistic | |
| 47,qwen2_1.5b_instruct,9.96,livebench_average,livebench_240701,[],holistic | |
| 48,yi_6b_chat,8.79,livebench_average,livebench_240701,[],holistic | |
| 49,qwen2_0.5b_instruct,6.78,livebench_average,livebench_240701,[],holistic | |
| 50,qwen1.5_1.8b_chat,6.09,livebench_average,livebench_240701,[],holistic | |
| 51,qwen1.5_0.5b_chat,5.26,livebench_average,livebench_240701,[],holistic | |
| 52,claude_3_5_sonnet_20240620,64.0,reasoning_average,livebench_240701,[],reasoning | |
| 53,gpt_4o_2024_05_13,55.0,reasoning_average,livebench_240701,[],reasoning | |
| 54,gpt_4_turbo_2024_04_09,54.0,reasoning_average,livebench_240701,[],reasoning | |
| 55,gpt_4_1106_preview,52.0,reasoning_average,livebench_240701,[],reasoning | |
| 56,claude_3_opus_20240229,41.0,reasoning_average,livebench_240701,[],reasoning | |
| 57,gpt_4_0125_preview,48.0,reasoning_average,livebench_240701,[],reasoning | |
| 58,deepseek_coder_v2,49.0,reasoning_average,livebench_240701,[],reasoning | |
| 59,gemini_1.5_pro_api_0514,33.0,reasoning_average,livebench_240701,[],reasoning | |
| 60,gemma_2_27b_it,31.0,reasoning_average,livebench_240701,[],reasoning | |
| 61,gemini_1.5_flash_api_0514,30.0,reasoning_average,livebench_240701,[],reasoning | |
| 62,qwen2_72b_instruct,42.0,reasoning_average,livebench_240701,[],reasoning | |
| 63,acm_rewrite_qwen2_72b_chat,37.0,reasoning_average,livebench_240701,[],reasoning | |
| 64,mistral_large_2402,35.0,reasoning_average,livebench_240701,[],reasoning | |
| 65,deepseek_chat_v2,29.0,reasoning_average,livebench_240701,[],reasoning | |
| 66,claude_3_sonnet_20240229,26.0,reasoning_average,livebench_240701,[],reasoning | |
| 67,meta_llama_3_70b_instruct,31.0,reasoning_average,livebench_240701,[],reasoning | |
| 68,claude_3_haiku_20240307,26.0,reasoning_average,livebench_240701,[],reasoning | |
| 69,mixtral_8x22b_instruct_v0.1,29.0,reasoning_average,livebench_240701,[],reasoning | |
| 70,gpt_3.5_turbo_0125,26.0,reasoning_average,livebench_240701,[],reasoning | |
| 71,gpt_3.5_turbo_1106,28.0,reasoning_average,livebench_240701,[],reasoning | |
| 72,command_r_plus,32.0,reasoning_average,livebench_240701,[],reasoning | |
| 73,mistral_small_2402,28.0,reasoning_average,livebench_240701,[],reasoning | |
| 74,gemma_2_9b_it,19.0,reasoning_average,livebench_240701,[],reasoning | |
| 75,phi_3_medium_4k_instruct,35.0,reasoning_average,livebench_240701,[],reasoning | |
| 76,phi_3_medium_128k_instruct,31.0,reasoning_average,livebench_240701,[],reasoning | |
| 77,deepseek_coder_v2_lite_instruct,22.0,reasoning_average,livebench_240701,[],reasoning | |
| 78,qwen1.5_110b_chat,26.0,reasoning_average,livebench_240701,[],reasoning | |
| 79,qwen1.5_72b_chat,21.0,reasoning_average,livebench_240701,[],reasoning | |
| 80,command_r,28.0,reasoning_average,livebench_240701,[],reasoning | |
| 81,phi_3_small_128k_instruct,36.0,reasoning_average,livebench_240701,[],reasoning | |
| 82,meta_llama_3_8b_instruct,25.0,reasoning_average,livebench_240701,[],reasoning | |
| 83,qwen2_7b_instruct,20.0,reasoning_average,livebench_240701,[],reasoning | |
| 84,phi_3_small_8k_instruct,23.0,reasoning_average,livebench_240701,[],reasoning | |
| 85,openhermes_2.5_mistral_7b,17.0,reasoning_average,livebench_240701,[],reasoning | |
| 86,mixtral_8x7b_instruct_v0.1,18.0,reasoning_average,livebench_240701,[],reasoning | |
| 87,mistral_7b_instruct_v0.2,13.0,reasoning_average,livebench_240701,[],reasoning | |
| 88,phi_3_mini_4k_instruct,19.0,reasoning_average,livebench_240701,[],reasoning | |
| 89,zephyr_7b_alpha,17.0,reasoning_average,livebench_240701,[],reasoning | |
| 90,phi_3_mini_128k_instruct,10.0,reasoning_average,livebench_240701,[],reasoning | |
| 91,zephyr_7b_beta,16.0,reasoning_average,livebench_240701,[],reasoning | |
| 92,deepseek_v2_lite_chat,13.0,reasoning_average,livebench_240701,[],reasoning | |
| 93,qwen1.5_7b_chat,13.0,reasoning_average,livebench_240701,[],reasoning | |
| 94,starling_lm_7b_beta,19.0,reasoning_average,livebench_240701,[],reasoning | |
| 95,vicuna_7b_v1.5_16k,15.0,reasoning_average,livebench_240701,[],reasoning | |
| 96,vicuna_7b_v1.5,12.0,reasoning_average,livebench_240701,[],reasoning | |
| 97,qwen1.5_4b_chat,13.0,reasoning_average,livebench_240701,[],reasoning | |
| 98,llama_2_7b_chat,5.0,reasoning_average,livebench_240701,[],reasoning | |
| 99,qwen2_1.5b_instruct,8.0,reasoning_average,livebench_240701,[],reasoning | |
| 100,yi_6b_chat,8.0,reasoning_average,livebench_240701,[],reasoning | |
| 101,qwen2_0.5b_instruct,3.0,reasoning_average,livebench_240701,[],reasoning | |
| 102,qwen1.5_1.8b_chat,5.0,reasoning_average,livebench_240701,[],reasoning | |
| 103,qwen1.5_0.5b_chat,4.0,reasoning_average,livebench_240701,[],reasoning | |
| 104,claude_3_5_sonnet_20240620,63.21,coding_average,livebench_240701,[],code | |
| 105,gpt_4o_2024_05_13,46.37,coding_average,livebench_240701,[],code | |
| 106,gpt_4_turbo_2024_04_09,47.05,coding_average,livebench_240701,[],code | |
| 107,gpt_4_1106_preview,44.37,coding_average,livebench_240701,[],code | |
| 108,claude_3_opus_20240229,40.05,coding_average,livebench_240701,[],code | |
| 109,gpt_4_0125_preview,44.05,coding_average,livebench_240701,[],code | |
| 110,deepseek_coder_v2,41.05,coding_average,livebench_240701,[],code | |
| 111,gemini_1.5_pro_api_0514,32.79,coding_average,livebench_240701,[],code | |
| 112,gemma_2_27b_it,36.74,coding_average,livebench_240701,[],code | |
| 113,gemini_1.5_flash_api_0514,39.05,coding_average,livebench_240701,[],code | |
| 114,qwen2_72b_instruct,31.79,coding_average,livebench_240701,[],code | |
| 115,acm_rewrite_qwen2_72b_chat,39.05,coding_average,livebench_240701,[],code | |
| 116,mistral_large_2402,26.84,coding_average,livebench_240701,[],code | |
| 117,deepseek_chat_v2,33.47,coding_average,livebench_240701,[],code | |
| 118,claude_3_sonnet_20240229,25.21,coding_average,livebench_240701,[],code | |
| 119,meta_llama_3_70b_instruct,20.95,coding_average,livebench_240701,[],code | |
| 120,claude_3_haiku_20240307,24.53,coding_average,livebench_240701,[],code | |
| 121,mixtral_8x22b_instruct_v0.1,33.11,coding_average,livebench_240701,[],code | |
| 122,gpt_3.5_turbo_0125,29.16,coding_average,livebench_240701,[],code | |
| 123,gpt_3.5_turbo_1106,26.84,coding_average,livebench_240701,[],code | |
| 124,command_r_plus,20.26,coding_average,livebench_240701,[],code | |
| 125,mistral_small_2402,24.21,coding_average,livebench_240701,[],code | |
| 126,gemma_2_9b_it,22.21,coding_average,livebench_240701,[],code | |
| 127,phi_3_medium_4k_instruct,20.58,coding_average,livebench_240701,[],code | |
| 128,phi_3_medium_128k_instruct,21.58,coding_average,livebench_240701,[],code | |
| 129,deepseek_coder_v2_lite_instruct,26.84,coding_average,livebench_240701,[],code | |
| 130,qwen1.5_110b_chat,22.21,coding_average,livebench_240701,[],code | |
| 131,qwen1.5_72b_chat,22.89,coding_average,livebench_240701,[],code | |
| 132,command_r,14.95,coding_average,livebench_240701,[],code | |
| 133,phi_3_small_128k_instruct,25.84,coding_average,livebench_240701,[],code | |
| 134,meta_llama_3_8b_instruct,18.26,coding_average,livebench_240701,[],code | |
| 135,qwen2_7b_instruct,29.21,coding_average,livebench_240701,[],code | |
| 136,phi_3_small_8k_instruct,19.58,coding_average,livebench_240701,[],code | |
| 137,openhermes_2.5_mistral_7b,11.63,coding_average,livebench_240701,[],code | |
| 138,mixtral_8x7b_instruct_v0.1,11.32,coding_average,livebench_240701,[],code | |
| 139,mistral_7b_instruct_v0.2,11.63,coding_average,livebench_240701,[],code | |
| 140,phi_3_mini_4k_instruct,14.95,coding_average,livebench_240701,[],code | |
| 141,zephyr_7b_alpha,11.32,coding_average,livebench_240701,[],code | |
| 142,phi_3_mini_128k_instruct,11.63,coding_average,livebench_240701,[],code | |
| 143,zephyr_7b_beta,8.32,coding_average,livebench_240701,[],code | |
| 144,deepseek_v2_lite_chat,8.63,coding_average,livebench_240701,[],code | |
| 145,qwen1.5_7b_chat,6.63,coding_average,livebench_240701,[],code | |
| 146,starling_lm_7b_beta,18.26,coding_average,livebench_240701,[],code | |
| 147,vicuna_7b_v1.5_16k,1.32,coding_average,livebench_240701,[],code | |
| 148,vicuna_7b_v1.5,1.0,coding_average,livebench_240701,[],code | |
| 149,qwen1.5_4b_chat,4.0,coding_average,livebench_240701,[],code | |
| 150,llama_2_7b_chat,0.0,coding_average,livebench_240701,[],code | |
| 151,qwen2_1.5b_instruct,5.63,coding_average,livebench_240701,[],code | |
| 152,yi_6b_chat,1.32,coding_average,livebench_240701,[],code | |
| 153,qwen2_0.5b_instruct,2.0,coding_average,livebench_240701,[],code | |
| 154,qwen1.5_1.8b_chat,0.0,coding_average,livebench_240701,[],code | |
| 155,qwen1.5_0.5b_chat,0.0,coding_average,livebench_240701,[],code | |
| 156,claude_3_5_sonnet_20240620,53.75,mathematics_average,livebench_240701,[],math | |
| 157,gpt_4o_2024_05_13,49.88,mathematics_average,livebench_240701,[],math | |
| 158,gpt_4_turbo_2024_04_09,48.99,mathematics_average,livebench_240701,[],math | |
| 159,gpt_4_1106_preview,47.55,mathematics_average,livebench_240701,[],math | |
| 160,claude_3_opus_20240229,46.54,mathematics_average,livebench_240701,[],math | |
| 161,gpt_4_0125_preview,42.75,mathematics_average,livebench_240701,[],math | |
| 162,deepseek_coder_v2,52.19,mathematics_average,livebench_240701,[],math | |
| 163,gemini_1.5_pro_api_0514,42.07,mathematics_average,livebench_240701,[],math | |
| 164,gemma_2_27b_it,36.23,mathematics_average,livebench_240701,[],math | |
| 165,gemini_1.5_flash_api_0514,38.54,mathematics_average,livebench_240701,[],math | |
| 166,qwen2_72b_instruct,43.44,mathematics_average,livebench_240701,[],math | |
| 167,acm_rewrite_qwen2_72b_chat,40.32,mathematics_average,livebench_240701,[],math | |
| 168,mistral_large_2402,32.2,mathematics_average,livebench_240701,[],math | |
| 169,deepseek_chat_v2,33.23,mathematics_average,livebench_240701,[],math | |
| 170,claude_3_sonnet_20240229,29.65,mathematics_average,livebench_240701,[],math | |
| 171,meta_llama_3_70b_instruct,32.31,mathematics_average,livebench_240701,[],math | |
| 172,claude_3_haiku_20240307,25.72,mathematics_average,livebench_240701,[],math | |
| 173,mixtral_8x22b_instruct_v0.1,26.94,mathematics_average,livebench_240701,[],math | |
| 174,gpt_3.5_turbo_0125,25.54,mathematics_average,livebench_240701,[],math | |
| 175,gpt_3.5_turbo_1106,28.13,mathematics_average,livebench_240701,[],math | |
| 176,command_r_plus,24.85,mathematics_average,livebench_240701,[],math | |
| 177,mistral_small_2402,26.76,mathematics_average,livebench_240701,[],math | |
| 178,gemma_2_9b_it,23.98,mathematics_average,livebench_240701,[],math | |
| 179,phi_3_medium_4k_instruct,27.54,mathematics_average,livebench_240701,[],math | |
| 180,phi_3_medium_128k_instruct,24.25,mathematics_average,livebench_240701,[],math | |
| 181,deepseek_coder_v2_lite_instruct,34.09,mathematics_average,livebench_240701,[],math | |
| 182,qwen1.5_110b_chat,25.58,mathematics_average,livebench_240701,[],math | |
| 183,qwen1.5_72b_chat,26.82,mathematics_average,livebench_240701,[],math | |
| 184,command_r,16.92,mathematics_average,livebench_240701,[],math | |
| 185,phi_3_small_128k_instruct,24.84,mathematics_average,livebench_240701,[],math | |
| 186,meta_llama_3_8b_instruct,17.58,mathematics_average,livebench_240701,[],math | |
| 187,qwen2_7b_instruct,25.83,mathematics_average,livebench_240701,[],math | |
| 188,phi_3_small_8k_instruct,24.15,mathematics_average,livebench_240701,[],math | |
| 189,openhermes_2.5_mistral_7b,20.1,mathematics_average,livebench_240701,[],math | |
| 190,mixtral_8x7b_instruct_v0.1,18.97,mathematics_average,livebench_240701,[],math | |
| 191,mistral_7b_instruct_v0.2,16.04,mathematics_average,livebench_240701,[],math | |
| 192,phi_3_mini_4k_instruct,19.88,mathematics_average,livebench_240701,[],math | |
| 193,zephyr_7b_alpha,9.61,mathematics_average,livebench_240701,[],math | |
| 194,phi_3_mini_128k_instruct,21.48,mathematics_average,livebench_240701,[],math | |
| 195,zephyr_7b_beta,11.23,mathematics_average,livebench_240701,[],math | |
| 196,deepseek_v2_lite_chat,11.99,mathematics_average,livebench_240701,[],math | |
| 197,qwen1.5_7b_chat,12.86,mathematics_average,livebench_240701,[],math | |
| 198,starling_lm_7b_beta,13.82,mathematics_average,livebench_240701,[],math | |
| 199,vicuna_7b_v1.5_16k,6.61,mathematics_average,livebench_240701,[],math | |
| 200,vicuna_7b_v1.5,4.33,mathematics_average,livebench_240701,[],math | |
| 201,qwen1.5_4b_chat,7.08,mathematics_average,livebench_240701,[],math | |
| 202,llama_2_7b_chat,4.78,mathematics_average,livebench_240701,[],math | |
| 203,qwen2_1.5b_instruct,7.16,mathematics_average,livebench_240701,[],math | |
| 204,yi_6b_chat,7.14,mathematics_average,livebench_240701,[],math | |
| 205,qwen2_0.5b_instruct,4.22,mathematics_average,livebench_240701,[],math | |
| 206,qwen1.5_1.8b_chat,2.14,mathematics_average,livebench_240701,[],math | |
| 207,qwen1.5_0.5b_chat,3.39,mathematics_average,livebench_240701,[],math | |
| 208,claude_3_5_sonnet_20240620,56.74,data_analysis_average,livebench_240701,[],knowledge | |
| 209,gpt_4o_2024_05_13,52.41,data_analysis_average,livebench_240701,[],knowledge | |
| 210,gpt_4_turbo_2024_04_09,51.32,data_analysis_average,livebench_240701,[],knowledge | |
| 211,gpt_4_1106_preview,51.33,data_analysis_average,livebench_240701,[],knowledge | |
| 212,claude_3_opus_20240229,54.32,data_analysis_average,livebench_240701,[],knowledge | |
| 213,gpt_4_0125_preview,54.06,data_analysis_average,livebench_240701,[],knowledge | |
| 214,deepseek_coder_v2,38.25,data_analysis_average,livebench_240701,[],knowledge | |
| 215,gemini_1.5_pro_api_0514,52.81,data_analysis_average,livebench_240701,[],knowledge | |
| 216,gemma_2_27b_it,43.58,data_analysis_average,livebench_240701,[],knowledge | |
| 217,gemini_1.5_flash_api_0514,44.03,data_analysis_average,livebench_240701,[],knowledge | |
| 218,qwen2_72b_instruct,26.24,data_analysis_average,livebench_240701,[],knowledge | |
| 219,acm_rewrite_qwen2_72b_chat,26.19,data_analysis_average,livebench_240701,[],knowledge | |
| 220,mistral_large_2402,42.55,data_analysis_average,livebench_240701,[],knowledge | |
| 221,deepseek_chat_v2,38.03,data_analysis_average,livebench_240701,[],knowledge | |
| 222,claude_3_sonnet_20240229,44.56,data_analysis_average,livebench_240701,[],knowledge | |
| 223,meta_llama_3_70b_instruct,42.41,data_analysis_average,livebench_240701,[],knowledge | |
| 224,claude_3_haiku_20240307,41.54,data_analysis_average,livebench_240701,[],knowledge | |
| 225,mixtral_8x22b_instruct_v0.1,30.33,data_analysis_average,livebench_240701,[],knowledge | |
| 226,gpt_3.5_turbo_0125,41.21,data_analysis_average,livebench_240701,[],knowledge | |
| 227,gpt_3.5_turbo_1106,41.7,data_analysis_average,livebench_240701,[],knowledge | |
| 228,command_r_plus,24.6,data_analysis_average,livebench_240701,[],knowledge | |
| 229,mistral_small_2402,31.88,data_analysis_average,livebench_240701,[],knowledge | |
| 230,gemma_2_9b_it,35.06,data_analysis_average,livebench_240701,[],knowledge | |
| 231,phi_3_medium_4k_instruct,31.63,data_analysis_average,livebench_240701,[],knowledge | |
| 232,phi_3_medium_128k_instruct,32.12,data_analysis_average,livebench_240701,[],knowledge | |
| 233,deepseek_coder_v2_lite_instruct,33.0,data_analysis_average,livebench_240701,[],knowledge | |
| 234,qwen1.5_110b_chat,31.45,data_analysis_average,livebench_240701,[],knowledge | |
| 235,qwen1.5_72b_chat,32.98,data_analysis_average,livebench_240701,[],knowledge | |
| 236,command_r,31.69,data_analysis_average,livebench_240701,[],knowledge | |
| 237,phi_3_small_128k_instruct,27.33,data_analysis_average,livebench_240701,[],knowledge | |
| 238,meta_llama_3_8b_instruct,23.33,data_analysis_average,livebench_240701,[],knowledge | |
| 239,qwen2_7b_instruct,28.75,data_analysis_average,livebench_240701,[],knowledge | |
| 240,phi_3_small_8k_instruct,27.5,data_analysis_average,livebench_240701,[],knowledge | |
| 241,openhermes_2.5_mistral_7b,26.92,data_analysis_average,livebench_240701,[],knowledge | |
| 242,mixtral_8x7b_instruct_v0.1,28.13,data_analysis_average,livebench_240701,[],knowledge | |
| 243,mistral_7b_instruct_v0.2,14.62,data_analysis_average,livebench_240701,[],knowledge | |
| 244,phi_3_mini_4k_instruct,14.67,data_analysis_average,livebench_240701,[],knowledge | |
| 245,zephyr_7b_alpha,17.4,data_analysis_average,livebench_240701,[],knowledge | |
| 246,phi_3_mini_128k_instruct,8.69,data_analysis_average,livebench_240701,[],knowledge | |
| 247,zephyr_7b_beta,15.75,data_analysis_average,livebench_240701,[],knowledge | |
| 248,deepseek_v2_lite_chat,18.19,data_analysis_average,livebench_240701,[],knowledge | |
| 249,qwen1.5_7b_chat,16.23,data_analysis_average,livebench_240701,[],knowledge | |
| 250,starling_lm_7b_beta,2.0,data_analysis_average,livebench_240701,[],knowledge | |
| 251,vicuna_7b_v1.5_16k,9.27,data_analysis_average,livebench_240701,[],knowledge | |
| 252,vicuna_7b_v1.5,2.67,data_analysis_average,livebench_240701,[],knowledge | |
| 253,qwen1.5_4b_chat,9.13,data_analysis_average,livebench_240701,[],knowledge | |
| 254,llama_2_7b_chat,0.0,data_analysis_average,livebench_240701,[],knowledge | |
| 255,qwen2_1.5b_instruct,10.01,data_analysis_average,livebench_240701,[],knowledge | |
| 256,yi_6b_chat,4.38,data_analysis_average,livebench_240701,[],knowledge | |
| 257,qwen2_0.5b_instruct,2.0,data_analysis_average,livebench_240701,[],knowledge | |
| 258,qwen1.5_1.8b_chat,3.33,data_analysis_average,livebench_240701,[],knowledge | |
| 259,qwen1.5_0.5b_chat,0.0,data_analysis_average,livebench_240701,[],knowledge | |
| 260,claude_3_5_sonnet_20240620,56.94,language_average,livebench_240701,[],other | |
| 261,gpt_4o_2024_05_13,53.94,language_average,livebench_240701,[],other | |
| 262,gpt_4_turbo_2024_04_09,45.26,language_average,livebench_240701,[],other | |
| 263,gpt_4_1106_preview,48.37,language_average,livebench_240701,[],other | |
| 264,claude_3_opus_20240229,51.72,language_average,livebench_240701,[],other | |
| 265,gpt_4_0125_preview,43.55,language_average,livebench_240701,[],other | |
| 266,deepseek_coder_v2,33.04,language_average,livebench_240701,[],other | |
| 267,gemini_1.5_pro_api_0514,38.25,language_average,livebench_240701,[],other | |
| 268,gemma_2_27b_it,32.4,language_average,livebench_240701,[],other | |
| 269,gemini_1.5_flash_api_0514,30.69,language_average,livebench_240701,[],other | |
| 270,qwen2_72b_instruct,29.21,language_average,livebench_240701,[],other | |
| 271,acm_rewrite_qwen2_72b_chat,30.03,language_average,livebench_240701,[],other | |
| 272,mistral_large_2402,28.74,language_average,livebench_240701,[],other | |
| 273,deepseek_chat_v2,32.29,language_average,livebench_240701,[],other | |
| 274,claude_3_sonnet_20240229,38.08,language_average,livebench_240701,[],other | |
| 275,meta_llama_3_70b_instruct,34.11,language_average,livebench_240701,[],other | |
| 276,claude_3_haiku_20240307,30.07,language_average,livebench_240701,[],other | |
| 277,mixtral_8x22b_instruct_v0.1,26.48,language_average,livebench_240701,[],other | |
| 278,gpt_3.5_turbo_0125,24.22,language_average,livebench_240701,[],other | |
| 279,gpt_3.5_turbo_1106,28.63,language_average,livebench_240701,[],other | |
| 280,command_r_plus,23.92,language_average,livebench_240701,[],other | |
| 281,mistral_small_2402,22.06,language_average,livebench_240701,[],other | |
| 282,gemma_2_9b_it,27.64,language_average,livebench_240701,[],other | |
| 283,phi_3_medium_4k_instruct,13.91,language_average,livebench_240701,[],other | |
| 284,phi_3_medium_128k_instruct,12.76,language_average,livebench_240701,[],other | |
| 285,deepseek_coder_v2_lite_instruct,10.64,language_average,livebench_240701,[],other | |
| 286,qwen1.5_110b_chat,13.22,language_average,livebench_240701,[],other | |
| 287,qwen1.5_72b_chat,11.37,language_average,livebench_240701,[],other | |
| 288,command_r,14.64,language_average,livebench_240701,[],other | |
| 289,phi_3_small_128k_instruct,12.28,language_average,livebench_240701,[],other | |
| 290,meta_llama_3_8b_instruct,18.72,language_average,livebench_240701,[],other | |
| 291,qwen2_7b_instruct,10.21,language_average,livebench_240701,[],other | |
| 292,phi_3_small_8k_instruct,14.96,language_average,livebench_240701,[],other | |
| 293,openhermes_2.5_mistral_7b,11.37,language_average,livebench_240701,[],other | |
| 294,mixtral_8x7b_instruct_v0.1,13.76,language_average,livebench_240701,[],other | |
| 295,mistral_7b_instruct_v0.2,9.05,language_average,livebench_240701,[],other | |
| 296,phi_3_mini_4k_instruct,7.1,language_average,livebench_240701,[],other | |
| 297,zephyr_7b_alpha,7.2,language_average,livebench_240701,[],other | |
| 298,phi_3_mini_128k_instruct,6.8,language_average,livebench_240701,[],other | |
| 299,zephyr_7b_beta,4.28,language_average,livebench_240701,[],other | |
| 300,deepseek_v2_lite_chat,9.2,language_average,livebench_240701,[],other | |
| 301,qwen1.5_7b_chat,6.18,language_average,livebench_240701,[],other | |
| 302,starling_lm_7b_beta,7.26,language_average,livebench_240701,[],other | |
| 303,vicuna_7b_v1.5_16k,7.92,language_average,livebench_240701,[],other | |
| 304,vicuna_7b_v1.5,8.66,language_average,livebench_240701,[],other | |
| 305,qwen1.5_4b_chat,5.8,language_average,livebench_240701,[],other | |
| 306,llama_2_7b_chat,6.86,language_average,livebench_240701,[],other | |
| 307,qwen2_1.5b_instruct,3.05,language_average,livebench_240701,[],other | |
| 308,yi_6b_chat,4.69,language_average,livebench_240701,[],other | |
| 309,qwen2_0.5b_instruct,2.8,language_average,livebench_240701,[],other | |
| 310,qwen1.5_1.8b_chat,3.16,language_average,livebench_240701,[],other | |
| 311,qwen1.5_0.5b_chat,2.88,language_average,livebench_240701,[],other | |
| 312,claude_3_5_sonnet_20240620,72.3,if_average,livebench_240701,[],other | |
| 313,gpt_4o_2024_05_13,72.17,if_average,livebench_240701,[],other | |
| 314,gpt_4_turbo_2024_04_09,71.39,if_average,livebench_240701,[],other | |
| 315,gpt_4_1106_preview,69.39,if_average,livebench_240701,[],other | |
| 316,claude_3_opus_20240229,70.87,if_average,livebench_240701,[],other | |
| 317,gpt_4_0125_preview,63.92,if_average,livebench_240701,[],other | |
| 318,deepseek_coder_v2,67.18,if_average,livebench_240701,[],other | |
| 319,gemini_1.5_pro_api_0514,67.2,if_average,livebench_240701,[],other | |
| 320,gemma_2_27b_it,67.37,if_average,livebench_240701,[],other | |
| 321,gemini_1.5_flash_api_0514,63.01,if_average,livebench_240701,[],other | |
| 322,qwen2_72b_instruct,68.27,if_average,livebench_240701,[],other | |
| 323,acm_rewrite_qwen2_72b_chat,65.0,if_average,livebench_240701,[],other | |
| 324,mistral_large_2402,68.19,if_average,livebench_240701,[],other | |
| 325,deepseek_chat_v2,64.34,if_average,livebench_240701,[],other | |
| 326,claude_3_sonnet_20240229,65.0,if_average,livebench_240701,[],other | |
| 327,meta_llama_3_70b_instruct,63.5,if_average,livebench_240701,[],other | |
| 328,claude_3_haiku_20240307,64.03,if_average,livebench_240701,[],other | |
| 329,mixtral_8x22b_instruct_v0.1,63.17,if_average,livebench_240701,[],other | |
| 330,gpt_3.5_turbo_0125,60.47,if_average,livebench_240701,[],other | |
| 331,gpt_3.5_turbo_1106,51.53,if_average,livebench_240701,[],other | |
| 332,command_r_plus,71.51,if_average,livebench_240701,[],other | |
| 333,mistral_small_2402,63.91,if_average,livebench_240701,[],other | |
| 334,gemma_2_9b_it,61.55,if_average,livebench_240701,[],other | |
| 335,phi_3_medium_4k_instruct,53.3,if_average,livebench_240701,[],other | |
| 336,phi_3_medium_128k_instruct,56.15,if_average,livebench_240701,[],other | |
| 337,deepseek_coder_v2_lite_instruct,48.34,if_average,livebench_240701,[],other | |
| 338,qwen1.5_110b_chat,55.26,if_average,livebench_240701,[],other | |
| 339,qwen1.5_72b_chat,58.25,if_average,livebench_240701,[],other | |
| 340,command_r,57.16,if_average,livebench_240701,[],other | |
| 341,phi_3_small_128k_instruct,36.88,if_average,livebench_240701,[],other | |
| 342,meta_llama_3_8b_instruct,57.14,if_average,livebench_240701,[],other | |
| 343,qwen2_7b_instruct,44.74,if_average,livebench_240701,[],other | |
| 344,phi_3_small_8k_instruct,48.24,if_average,livebench_240701,[],other | |
| 345,openhermes_2.5_mistral_7b,52.78,if_average,livebench_240701,[],other | |
| 346,mixtral_8x7b_instruct_v0.1,44.81,if_average,livebench_240701,[],other | |
| 347,mistral_7b_instruct_v0.2,51.65,if_average,livebench_240701,[],other | |
| 348,phi_3_mini_4k_instruct,40.05,if_average,livebench_240701,[],other | |
| 349,zephyr_7b_alpha,52.79,if_average,livebench_240701,[],other | |
| 350,phi_3_mini_128k_instruct,49.65,if_average,livebench_240701,[],other | |
| 351,zephyr_7b_beta,48.32,if_average,livebench_240701,[],other | |
| 352,deepseek_v2_lite_chat,41.83,if_average,livebench_240701,[],other | |
| 353,qwen1.5_7b_chat,44.12,if_average,livebench_240701,[],other | |
| 354,starling_lm_7b_beta,38.32,if_average,livebench_240701,[],other | |
| 355,vicuna_7b_v1.5_16k,42.12,if_average,livebench_240701,[],other | |
| 356,vicuna_7b_v1.5,41.75,if_average,livebench_240701,[],other | |
| 357,qwen1.5_4b_chat,27.75,if_average,livebench_240701,[],other | |
| 358,llama_2_7b_chat,44.88,if_average,livebench_240701,[],other | |
| 359,qwen2_1.5b_instruct,25.9,if_average,livebench_240701,[],other | |
| 360,yi_6b_chat,27.22,if_average,livebench_240701,[],other | |
| 361,qwen2_0.5b_instruct,26.63,if_average,livebench_240701,[],other | |
| 362,qwen1.5_1.8b_chat,22.9,if_average,livebench_240701,[],other | |
| 363,qwen1.5_0.5b_chat,21.3,if_average,livebench_240701,[],other | |